1use alloc::vec::Vec;
9use super::BETTER_WINDOW_LOG;
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17use super::bt::BtMatcher;
18#[cfg(test)]
19use super::cost_model::HC_MAX_LIT;
20use super::cost_model::{
21 HC_BITCOST_MULTIPLIER, HC_FORMAT_MINMATCH, HC_OPT_NUM, HC_PREDEF_THRESHOLD, HcOptState,
22 HcOptimalCostProfile,
23};
24#[cfg(test)]
25use super::cost_model::{HC_BLOCKSIZE_MAX, HC_MAX_LL, HC_MAX_ML, HC_MAX_OFF, HcOptPriceType};
26use super::dfast::DfastMatchGenerator;
27#[cfg(test)]
28use super::match_table::helpers::FAST_HASH_FILL_STEP;
29#[cfg(test)]
30use super::match_table::helpers::INCOMPRESSIBLE_SKIP_STEP;
31use super::match_table::helpers::MIN_MATCH_LEN;
32#[cfg(test)]
33use super::match_table::helpers::common_prefix_len;
34#[cfg(test)]
35use super::opt::ldm::HcRawSeq;
36use super::opt::ldm::{HcOptLdmState, HcRawSeqStore};
37use super::opt::types::{
38 HcCandidateQuery, HcOptimalNode, HcOptimalPlanBuffers, HcOptimalPlanState, HcOptimalSequence,
39 MatchCandidate,
40};
41use super::row::RowMatchGenerator;
42use super::simple::{MatchGenerator, SuffixStore};
43#[cfg(all(
44 test,
45 feature = "std",
46 target_arch = "aarch64",
47 target_endian = "little"
48))]
49use std::arch::is_aarch64_feature_detected;
50#[cfg(all(test, feature = "std", target_arch = "x86_64"))]
51use std::arch::is_x86_feature_detected;
52
53pub(crate) const DFAST_MIN_MATCH_LEN: usize = 5;
54pub(crate) const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
55pub(crate) const ROW_MIN_MATCH_LEN: usize = 6;
56pub(crate) const DFAST_TARGET_LEN: usize = 48;
57pub(crate) const DFAST_HASH_BITS: usize = 17;
80pub(crate) const DFAST_SHORT_HASH_BITS_DELTA: usize = 1;
86pub(crate) const DFAST_EMPTY_SLOT: u32 = 0;
94
95pub(crate) const DFAST_REBASE_GUARD_BAND: u32 = 1u32 << 30;
102pub(crate) const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
103pub(crate) const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
104pub(crate) const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
105pub(crate) const DFAST_MAX_SKIP_STEP: usize = 8;
106pub(crate) const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
107pub(crate) const ROW_HASH_BITS: usize = 20;
108pub(crate) const ROW_LOG: usize = 5;
109pub(crate) const ROW_SEARCH_DEPTH: usize = 16;
110pub(crate) const ROW_TARGET_LEN: usize = 48;
111pub(crate) const ROW_TAG_BITS: usize = 8;
112pub(crate) const ROW_EMPTY_SLOT: usize = usize::MAX;
113pub(crate) const ROW_HASH_KEY_LEN: usize = 4;
114#[cfg(test)]
121use super::match_table::storage::{HC_PRIME3BYTES, HC_PRIME4BYTES};
122
123#[cfg(test)]
128use super::match_table::storage::HC_EMPTY;
129use super::match_table::storage::{HC_CHAIN_LOG, HC_HASH_LOG, HC3_HASH_LOG};
130const HC_SEARCH_DEPTH: usize = 16;
135use super::hc::HC_MIN_MATCH_LEN;
138const HC_OPT_MIN_MATCH_LEN: usize = HC_FORMAT_MINMATCH;
139const HC_TARGET_LEN: usize = 48;
140
141use super::hc::MAX_HC_SEARCH_DEPTH;
143
144#[derive(Copy, Clone)]
152struct HcConfig {
153 hash_log: usize,
154 chain_log: usize,
155 search_depth: usize,
156 target_len: usize,
157}
158
159#[derive(Copy, Clone)]
160pub(crate) struct RowConfig {
161 pub(crate) hash_bits: usize,
162 pub(crate) row_log: usize,
163 pub(crate) search_depth: usize,
164 pub(crate) target_len: usize,
165}
166
167const HC_CONFIG: HcConfig = HcConfig {
168 hash_log: HC_HASH_LOG,
169 chain_log: HC_CHAIN_LOG,
170 search_depth: HC_SEARCH_DEPTH,
171 target_len: HC_TARGET_LEN,
172};
173
174const BEST_HC_CONFIG: HcConfig = HcConfig {
176 hash_log: 21,
177 chain_log: 20,
178 search_depth: 32,
179 target_len: 128,
180};
181
182const BTOPT_HC_CONFIG: HcConfig = HcConfig {
183 hash_log: 23,
184 chain_log: 22,
185 search_depth: 32,
186 target_len: 256,
187};
188
189const BTULTRA_HC_CONFIG: HcConfig = HcConfig {
190 hash_log: 23,
191 chain_log: 23,
192 search_depth: 32,
193 target_len: 256,
194};
195
196const BTULTRA2_HC_CONFIG: HcConfig = HcConfig {
197 hash_log: 24,
198 chain_log: 24,
199 search_depth: 512,
200 target_len: 256,
201};
202
203const BTULTRA2_HC_CONFIG_L22: HcConfig = HcConfig {
204 hash_log: 25,
205 chain_log: 27,
206 search_depth: 512,
207 target_len: 999,
208};
209
210const BTULTRA2_HC_CONFIG_L22_256K: HcConfig = HcConfig {
211 hash_log: 19,
212 chain_log: 19,
213 search_depth: 1 << 13,
214 target_len: 999,
215};
216
217const BTULTRA2_HC_CONFIG_L22_128K: HcConfig = HcConfig {
218 hash_log: 17,
219 chain_log: 18,
220 search_depth: 1 << 11,
221 target_len: 999,
222};
223
224const BTULTRA2_HC_CONFIG_L22_16K: HcConfig = HcConfig {
225 hash_log: 15,
226 chain_log: 15,
227 search_depth: 1 << 10,
228 target_len: 999,
229};
230
231const ROW_CONFIG: RowConfig = RowConfig {
232 hash_bits: ROW_HASH_BITS,
233 row_log: ROW_LOG,
234 search_depth: ROW_SEARCH_DEPTH,
235 target_len: ROW_TARGET_LEN,
236};
237
238#[derive(Copy, Clone)]
244struct LevelParams {
245 strategy_tag: super::strategy::StrategyTag,
246 window_log: u8,
247 hash_fill_step: usize,
248 lazy_depth: u8,
249 hc: HcConfig,
250 row: RowConfig,
251}
252
253impl LevelParams {
254 fn backend(&self) -> super::strategy::BackendTag {
258 self.strategy_tag.backend()
259 }
260}
261
262fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
263 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
264 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
265}
266
267fn row_hash_bits_for_window(max_window_size: usize) -> usize {
268 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
269 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
270}
271
272#[rustfmt::skip]
280const LEVEL_TABLE: [LevelParams; 22] = [
281 LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
284 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
285 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
286 LevelParams { strategy_tag: super::strategy::StrategyTag::Greedy, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
287 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
288 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
289 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
290 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
291 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
292 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
293 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
294 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
295 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
296 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
297 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
298 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
299 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
300 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
301 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
302 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
303 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
304 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 27, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG_L22, row: ROW_CONFIG },
305];
306
307pub(crate) const MIN_WINDOW_LOG: u8 = 10;
309const MIN_HINTED_WINDOW_LOG: u8 = 14;
315
316fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
326 let src_log = if src_size == 0 {
331 MIN_WINDOW_LOG
332 } else {
333 (64 - (src_size - 1).leading_zeros()) as u8 };
335 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
336 if src_log < params.window_log {
337 params.window_log = src_log;
338 }
339 let backend = params.backend();
343 if backend == super::strategy::BackendTag::HashChain {
344 if (src_log + 2) < params.hc.hash_log as u8 {
345 params.hc.hash_log = (src_log + 2) as usize;
346 }
347 if (src_log + 1) < params.hc.chain_log as u8 {
348 params.hc.chain_log = (src_log + 1) as usize;
349 }
350 } else if backend == super::strategy::BackendTag::Row {
351 let max_window_size = 1usize << params.window_log;
352 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
353 }
354 params
355}
356
357fn level22_btultra2_params_for_source_size(source_size: Option<u64>) -> LevelParams {
358 let mut hc = match source_size {
359 Some(size) if size <= 16 * 1024 => BTULTRA2_HC_CONFIG_L22_16K,
360 Some(size) if size <= 128 * 1024 => BTULTRA2_HC_CONFIG_L22_128K,
361 Some(size) if size <= 256 * 1024 => BTULTRA2_HC_CONFIG_L22_256K,
362 _ => BTULTRA2_HC_CONFIG_L22,
363 };
364 let mut window_log = match source_size {
365 Some(size) if size <= 16 * 1024 => 14,
366 Some(size) if size <= 128 * 1024 => 17,
367 Some(size) if size <= 256 * 1024 => 18,
368 _ => 27,
369 };
370 if let Some(size) = source_size
371 && size > 256 * 1024
372 {
373 let src_log = if size == 0 {
374 MIN_WINDOW_LOG
375 } else {
376 (64 - (size - 1).leading_zeros()) as u8
377 };
378 window_log = window_log.min(src_log.max(MIN_WINDOW_LOG));
379 let adjusted_table_log = window_log as usize + 1;
380 hc.hash_log = hc.hash_log.min(adjusted_table_log);
381 hc.chain_log = hc.chain_log.min(adjusted_table_log);
382 }
383 LevelParams {
384 strategy_tag: super::strategy::StrategyTag::BtUltra2,
385 window_log,
386 hash_fill_step: 1,
387 lazy_depth: 2,
388 hc,
389 row: ROW_CONFIG,
390 }
391}
392
393fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
396 if matches!(level, CompressionLevel::Level(22)) {
397 return level22_btultra2_params_for_source_size(source_size);
398 }
399 let params = match level {
400 CompressionLevel::Uncompressed => LevelParams {
401 strategy_tag: super::strategy::StrategyTag::Fast,
402 window_log: 17,
403 hash_fill_step: 1,
404 lazy_depth: 0,
405 hc: HC_CONFIG,
406 row: ROW_CONFIG,
407 },
408 CompressionLevel::Fastest => LEVEL_TABLE[0],
409 CompressionLevel::Default => LEVEL_TABLE[2],
410 CompressionLevel::Better => LEVEL_TABLE[6],
411 CompressionLevel::Best => LEVEL_TABLE[10],
412 CompressionLevel::Level(n) => {
413 if n > 0 {
414 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
415 LEVEL_TABLE[idx]
416 } else if n == 0 {
417 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
419 } else {
420 let acceleration =
424 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
425 let step = (acceleration + 3).min(128);
426 LevelParams {
427 strategy_tag: super::strategy::StrategyTag::Fast,
428 window_log: 17,
429 hash_fill_step: step,
430 lazy_depth: 0,
431 hc: HC_CONFIG,
432 row: ROW_CONFIG,
433 }
434 }
435 }
436 };
437 if let Some(size) = source_size {
438 adjust_params_for_source_size(params, size)
439 } else {
440 params
441 }
442}
443
444enum MatcherStorage {
462 Simple(MatchGenerator),
469 Dfast(DfastMatchGenerator),
474 Row(RowMatchGenerator),
478 HashChain(HcMatchGenerator),
490}
491
492impl MatcherStorage {
493 fn backend(&self) -> super::strategy::BackendTag {
495 use super::strategy::BackendTag;
496 match self {
497 Self::Simple(_) => BackendTag::Simple,
498 Self::Dfast(_) => BackendTag::Dfast,
499 Self::Row(_) => BackendTag::Row,
500 Self::HashChain(_) => BackendTag::HashChain,
501 }
502 }
503}
504
505pub struct MatchGeneratorDriver {
507 vec_pool: Vec<Vec<u8>>,
508 suffix_pool: Vec<SuffixStore>,
509 storage: MatcherStorage,
516 strategy_tag: super::strategy::StrategyTag,
522 slice_size: usize,
523 base_slice_size: usize,
524 reported_window_size: usize,
527 dictionary_retained_budget: usize,
530 source_size_hint: Option<u64>,
532}
533
534impl MatchGeneratorDriver {
535 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
540 let max_window_size = max_slices_in_window * slice_size;
541 Self {
542 vec_pool: Vec::new(),
543 suffix_pool: Vec::new(),
544 storage: MatcherStorage::Simple(MatchGenerator::new(max_window_size)),
545 strategy_tag: super::strategy::StrategyTag::Fast,
546 slice_size,
547 base_slice_size: slice_size,
548 reported_window_size: max_window_size,
549 dictionary_retained_budget: 0,
550 source_size_hint: None,
551 }
552 }
553
554 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
555 resolve_level_params(level, source_size)
556 }
557
558 fn active_backend(&self) -> super::strategy::BackendTag {
561 self.storage.backend()
562 }
563
564 #[cfg(test)]
565 fn simple(&self) -> &MatchGenerator {
566 match &self.storage {
567 MatcherStorage::Simple(m) => m,
568 _ => panic!("simple backend must be initialized by reset() before use"),
569 }
570 }
571
572 fn simple_mut(&mut self) -> &mut MatchGenerator {
573 match &mut self.storage {
574 MatcherStorage::Simple(m) => m,
575 _ => panic!("simple backend must be initialized by reset() before use"),
576 }
577 }
578
579 #[cfg(test)]
580 fn dfast_matcher(&self) -> &DfastMatchGenerator {
581 match &self.storage {
582 MatcherStorage::Dfast(m) => m,
583 _ => panic!("dfast backend must be initialized by reset() before use"),
584 }
585 }
586
587 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
588 match &mut self.storage {
589 MatcherStorage::Dfast(m) => m,
590 _ => panic!("dfast backend must be initialized by reset() before use"),
591 }
592 }
593
594 #[cfg(test)]
595 fn row_matcher(&self) -> &RowMatchGenerator {
596 match &self.storage {
597 MatcherStorage::Row(m) => m,
598 _ => panic!("row backend must be initialized by reset() before use"),
599 }
600 }
601
602 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
603 match &mut self.storage {
604 MatcherStorage::Row(m) => m,
605 _ => panic!("row backend must be initialized by reset() before use"),
606 }
607 }
608
609 #[cfg(test)]
610 fn hc_matcher(&self) -> &HcMatchGenerator {
611 match &self.storage {
612 MatcherStorage::HashChain(m) => m,
613 _ => panic!("hash chain backend must be initialized by reset() before use"),
614 }
615 }
616
617 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
618 match &mut self.storage {
619 MatcherStorage::HashChain(m) => m,
620 _ => panic!("hash chain backend must be initialized by reset() before use"),
621 }
622 }
623
624 #[must_use]
633 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) -> bool {
634 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
635 if reclaimed == 0 {
636 return false;
637 }
638 self.dictionary_retained_budget -= reclaimed;
639 match self.active_backend() {
640 super::strategy::BackendTag::Simple => {
641 let matcher = self.simple_mut();
642 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
643 }
644 super::strategy::BackendTag::Dfast => {
645 let matcher = self.dfast_matcher_mut();
646 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
647 }
648 super::strategy::BackendTag::Row => {
649 let matcher = self.row_matcher_mut();
650 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
651 }
652 super::strategy::BackendTag::HashChain => {
653 let matcher = self.hc_matcher_mut();
654 matcher.table.max_window_size =
655 matcher.table.max_window_size.saturating_sub(reclaimed);
656 }
657 }
658 true
659 }
660
661 fn trim_after_budget_retire(&mut self) {
662 loop {
663 let mut evicted_bytes = 0usize;
664 match self.active_backend() {
665 super::strategy::BackendTag::Simple => {
666 let vec_pool = &mut self.vec_pool;
667 let suffix_pool = &mut self.suffix_pool;
668 let MatcherStorage::Simple(m) = &mut self.storage else {
669 unreachable!("active_backend() == Simple proven above");
670 };
671 m.reserve(0, |mut data, mut suffixes| {
672 evicted_bytes += data.len();
673 data.resize(data.capacity(), 0);
674 vec_pool.push(data);
675 suffixes.slots.clear();
676 suffixes.slots.resize(suffixes.slots.capacity(), None);
677 suffix_pool.push(suffixes);
678 });
679 }
680 super::strategy::BackendTag::Dfast => {
681 let dfast = self.dfast_matcher_mut();
690 let pre = dfast.window_size;
691 dfast.trim_to_window();
692 evicted_bytes += pre - dfast.window_size;
693 }
694 super::strategy::BackendTag::Row => {
695 let mut retired = Vec::new();
696 self.row_matcher_mut().trim_to_window(|data| {
697 evicted_bytes += data.len();
698 retired.push(data);
699 });
700 for mut data in retired {
701 data.resize(data.capacity(), 0);
702 self.vec_pool.push(data);
703 }
704 }
705 super::strategy::BackendTag::HashChain => {
706 let mut retired = Vec::new();
707 self.hc_matcher_mut().table.trim_to_window(|data| {
708 evicted_bytes += data.len();
709 retired.push(data);
710 });
711 for mut data in retired {
712 data.resize(data.capacity(), 0);
713 self.vec_pool.push(data);
714 }
715 }
716 }
717 if evicted_bytes == 0 {
718 break;
719 }
720 let _ = self.retire_dictionary_budget(evicted_bytes);
734 }
735 }
736
737 fn skip_matching_for_dictionary_priming(&mut self) {
738 match self.active_backend() {
739 super::strategy::BackendTag::Simple => {
740 self.simple_mut().skip_matching_with_hint(Some(false))
741 }
742 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
743 super::strategy::BackendTag::Row => {
744 self.row_matcher_mut().skip_matching_with_hint(Some(false))
745 }
746 super::strategy::BackendTag::HashChain => {
747 self.hc_matcher_mut().skip_matching(Some(false))
748 }
749 }
750 }
751}
752
753impl Matcher for MatchGeneratorDriver {
754 fn supports_dictionary_priming(&self) -> bool {
755 true
756 }
757
758 fn set_source_size_hint(&mut self, size: u64) {
759 self.source_size_hint = Some(size);
760 }
761
762 fn reset(&mut self, level: CompressionLevel) {
763 let hint = self.source_size_hint.take();
764 let hinted = hint.is_some();
765 let params = Self::level_params(level, hint);
766 let next_backend = params.backend();
767 let max_window_size = 1usize << params.window_log;
768 self.dictionary_retained_budget = 0;
769 if self.active_backend() != next_backend {
770 match &mut self.storage {
776 MatcherStorage::Simple(m) => {
777 let vec_pool = &mut self.vec_pool;
778 let suffix_pool = &mut self.suffix_pool;
779 m.reset(|mut data, mut suffixes| {
780 data.resize(data.capacity(), 0);
781 vec_pool.push(data);
782 suffixes.slots.clear();
783 suffixes.slots.resize(suffixes.slots.capacity(), None);
784 suffix_pool.push(suffixes);
785 });
786 }
787 MatcherStorage::Dfast(m) => {
788 m.short_hash = Vec::new();
801 m.long_hash = Vec::new();
802 m.reset();
803 }
804 MatcherStorage::Row(m) => {
805 m.row_heads = Vec::new();
806 m.row_positions = Vec::new();
807 m.row_tags = Vec::new();
808 let vec_pool = &mut self.vec_pool;
809 m.reset(|mut data| {
810 data.resize(data.capacity(), 0);
811 vec_pool.push(data);
812 });
813 }
814 MatcherStorage::HashChain(m) => {
815 m.table.hash_table = Vec::new();
823 m.table.chain_table = Vec::new();
824 m.table.hash3_table = Vec::new();
825 let vec_pool = &mut self.vec_pool;
826 m.reset(|mut data| {
827 data.resize(data.capacity(), 0);
828 vec_pool.push(data);
829 });
830 }
831 }
832 self.storage = match next_backend {
835 super::strategy::BackendTag::Simple => {
836 MatcherStorage::Simple(MatchGenerator::new(max_window_size))
837 }
838 super::strategy::BackendTag::Dfast => {
839 MatcherStorage::Dfast(DfastMatchGenerator::new(max_window_size))
840 }
841 super::strategy::BackendTag::Row => {
842 MatcherStorage::Row(RowMatchGenerator::new(max_window_size))
843 }
844 super::strategy::BackendTag::HashChain => {
845 MatcherStorage::HashChain(HcMatchGenerator::new(max_window_size))
846 }
847 };
848 }
849
850 self.strategy_tag = params.strategy_tag;
856 self.slice_size = self.base_slice_size.min(max_window_size);
857 self.reported_window_size = max_window_size;
858 let strategy_tag = self.strategy_tag;
859 match &mut self.storage {
860 MatcherStorage::Simple(m) => {
861 let vec_pool = &mut self.vec_pool;
862 let suffix_pool = &mut self.suffix_pool;
863 m.max_window_size = max_window_size;
864 m.hash_fill_step = params.hash_fill_step;
865 m.reset(|mut data, mut suffixes| {
866 data.resize(data.capacity(), 0);
867 vec_pool.push(data);
868 suffixes.slots.clear();
869 suffixes.slots.resize(suffixes.slots.capacity(), None);
870 suffix_pool.push(suffixes);
871 });
872 }
873 MatcherStorage::Dfast(dfast) => {
874 dfast.max_window_size = max_window_size;
875 dfast.lazy_depth = params.lazy_depth;
876 dfast.use_fast_loop = matches!(
877 level,
878 CompressionLevel::Default
879 | CompressionLevel::Level(0)
880 | CompressionLevel::Level(3)
881 );
882 dfast.set_hash_bits(if hinted {
883 dfast_hash_bits_for_window(max_window_size)
884 } else {
885 DFAST_HASH_BITS
886 });
887 dfast.reset();
891 }
892 MatcherStorage::Row(row) => {
893 row.max_window_size = max_window_size;
894 row.lazy_depth = params.lazy_depth;
895 row.configure(params.row);
896 if hinted {
897 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
898 }
899 let vec_pool = &mut self.vec_pool;
900 row.reset(|mut data| {
901 data.resize(data.capacity(), 0);
902 vec_pool.push(data);
903 });
904 }
905 MatcherStorage::HashChain(hc) => {
906 hc.table.max_window_size = max_window_size;
907 hc.hc.lazy_depth = params.lazy_depth;
908 hc.configure(params.hc, strategy_tag, params.window_log);
909 let vec_pool = &mut self.vec_pool;
910 hc.reset(|mut data| {
911 data.resize(data.capacity(), 0);
912 vec_pool.push(data);
913 });
914 }
915 }
916 }
917
918 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
919 match self.active_backend() {
920 super::strategy::BackendTag::Simple => self.simple_mut().offset_hist = offset_hist,
921 super::strategy::BackendTag::Dfast => {
922 self.dfast_matcher_mut().offset_hist = offset_hist
923 }
924 super::strategy::BackendTag::Row => self.row_matcher_mut().offset_hist = offset_hist,
925 super::strategy::BackendTag::HashChain => {
926 let matcher = self.hc_matcher_mut();
927 matcher.table.offset_hist = offset_hist;
928 matcher.table.mark_dictionary_primed();
929 }
930 }
931
932 if dict_content.is_empty() {
933 return;
934 }
935
936 let retained_dict_budget = dict_content.len();
939 match self.active_backend() {
940 super::strategy::BackendTag::Simple => {
941 let matcher = self.simple_mut();
942 matcher.max_window_size =
943 matcher.max_window_size.saturating_add(retained_dict_budget);
944 }
945 super::strategy::BackendTag::Dfast => {
946 let matcher = self.dfast_matcher_mut();
947 matcher.max_window_size =
948 matcher.max_window_size.saturating_add(retained_dict_budget);
949 }
950 super::strategy::BackendTag::Row => {
951 let matcher = self.row_matcher_mut();
952 matcher.max_window_size =
953 matcher.max_window_size.saturating_add(retained_dict_budget);
954 }
955 super::strategy::BackendTag::HashChain => {
956 let matcher = self.hc_matcher_mut();
957 matcher.table.max_window_size = matcher
958 .table
959 .max_window_size
960 .saturating_add(retained_dict_budget);
961 }
962 }
963
964 let mut start = 0usize;
965 let mut committed_dict_budget = 0usize;
966 let min_primed_tail = match self.active_backend() {
970 super::strategy::BackendTag::Simple => MIN_MATCH_LEN,
971 super::strategy::BackendTag::Dfast
972 | super::strategy::BackendTag::Row
973 | super::strategy::BackendTag::HashChain => 4,
974 };
975 while start < dict_content.len() {
976 let end = (start + self.slice_size).min(dict_content.len());
977 if end - start < min_primed_tail {
978 break;
979 }
980 let mut space = self.get_next_space();
981 space.clear();
982 space.extend_from_slice(&dict_content[start..end]);
983 self.commit_space(space);
984 self.skip_matching_for_dictionary_priming();
985 committed_dict_budget += end - start;
986 start = end;
987 }
988
989 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
990 if uncommitted_tail_budget > 0 {
991 match self.active_backend() {
992 super::strategy::BackendTag::Simple => {
993 let matcher = self.simple_mut();
994 matcher.max_window_size = matcher
995 .max_window_size
996 .saturating_sub(uncommitted_tail_budget);
997 }
998 super::strategy::BackendTag::Dfast => {
999 let matcher = self.dfast_matcher_mut();
1000 matcher.max_window_size = matcher
1001 .max_window_size
1002 .saturating_sub(uncommitted_tail_budget);
1003 }
1004 super::strategy::BackendTag::Row => {
1005 let matcher = self.row_matcher_mut();
1006 matcher.max_window_size = matcher
1007 .max_window_size
1008 .saturating_sub(uncommitted_tail_budget);
1009 }
1010 super::strategy::BackendTag::HashChain => {
1011 let matcher = self.hc_matcher_mut();
1012 matcher.table.max_window_size = matcher
1013 .table
1014 .max_window_size
1015 .saturating_sub(uncommitted_tail_budget);
1016 }
1017 }
1018 }
1019 if committed_dict_budget > 0 {
1020 self.dictionary_retained_budget = self
1021 .dictionary_retained_budget
1022 .saturating_add(committed_dict_budget);
1023 }
1024 if self.active_backend() == super::strategy::BackendTag::HashChain {
1025 self.hc_matcher_mut()
1026 .table
1027 .set_dictionary_limit_from_primed_bytes(committed_dict_budget);
1028 }
1029 }
1030
1031 fn seed_dictionary_entropy(
1032 &mut self,
1033 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
1034 ll: Option<&crate::fse::fse_encoder::FSETable>,
1035 ml: Option<&crate::fse::fse_encoder::FSETable>,
1036 of: Option<&crate::fse::fse_encoder::FSETable>,
1037 ) {
1038 if self.active_backend() == super::strategy::BackendTag::HashChain {
1039 self.hc_matcher_mut()
1040 .seed_dictionary_entropy(huff, ll, ml, of);
1041 }
1042 }
1043
1044 fn window_size(&self) -> u64 {
1045 self.reported_window_size as u64
1046 }
1047
1048 fn get_next_space(&mut self) -> Vec<u8> {
1049 if let Some(mut space) = self.vec_pool.pop() {
1050 if space.len() > self.slice_size {
1051 space.truncate(self.slice_size);
1052 }
1053 if space.len() < self.slice_size {
1054 space.resize(self.slice_size, 0);
1055 }
1056 return space;
1057 }
1058 alloc::vec![0; self.slice_size]
1059 }
1060
1061 fn get_last_space(&mut self) -> &[u8] {
1062 match &self.storage {
1063 MatcherStorage::Simple(m) => m.window.last().unwrap().data.as_slice(),
1064 MatcherStorage::Dfast(m) => m.get_last_space(),
1065 MatcherStorage::Row(m) => m.get_last_space(),
1066 MatcherStorage::HashChain(m) => m.table.get_last_space(),
1067 }
1068 }
1069
1070 fn commit_space(&mut self, space: Vec<u8>) {
1071 let mut evicted_bytes = 0usize;
1072 let vec_pool = &mut self.vec_pool;
1076 let suffix_pool = &mut self.suffix_pool;
1077 match &mut self.storage {
1078 MatcherStorage::Simple(m) => {
1079 let suffixes = match suffix_pool.pop() {
1080 Some(store) if store.slots.len() >= space.len() => store,
1081 _ => SuffixStore::with_capacity(space.len()),
1082 };
1083 m.add_data(space, suffixes, |mut data, mut suffixes| {
1084 evicted_bytes += data.len();
1085 data.resize(data.capacity(), 0);
1086 vec_pool.push(data);
1087 suffixes.slots.clear();
1088 suffixes.slots.resize(suffixes.slots.capacity(), None);
1089 suffix_pool.push(suffixes);
1090 });
1091 }
1092 MatcherStorage::Dfast(m) => {
1093 let pre = m.window_size;
1115 let space_len = space.len();
1116 m.add_data(space, |mut data| {
1117 data.resize(data.capacity(), 0);
1118 vec_pool.push(data);
1119 });
1120 evicted_bytes += pre.saturating_add(space_len).saturating_sub(m.window_size);
1121 }
1122 MatcherStorage::Row(m) => {
1123 m.add_data(space, |mut data| {
1124 evicted_bytes += data.len();
1125 data.resize(data.capacity(), 0);
1126 vec_pool.push(data);
1127 });
1128 }
1129 MatcherStorage::HashChain(m) => {
1130 m.table.add_data(space, |mut data| {
1131 evicted_bytes += data.len();
1132 data.resize(data.capacity(), 0);
1133 vec_pool.push(data);
1134 });
1135 }
1136 }
1137 if self.retire_dictionary_budget(evicted_bytes) {
1147 self.trim_after_budget_retire();
1148 }
1149 }
1150
1151 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1152 use super::strategy::{self, StrategyTag};
1153 match self.strategy_tag {
1162 StrategyTag::Fast => self.compress_block::<strategy::Fast>(&mut handle_sequence),
1163 StrategyTag::Dfast => self.compress_block::<strategy::Dfast>(&mut handle_sequence),
1164 StrategyTag::Greedy => self.compress_block::<strategy::Greedy>(&mut handle_sequence),
1165 StrategyTag::Lazy => self.compress_block::<strategy::Lazy>(&mut handle_sequence),
1166 StrategyTag::BtOpt => self.compress_block::<strategy::BtOpt>(&mut handle_sequence),
1167 StrategyTag::BtUltra => self.compress_block::<strategy::BtUltra>(&mut handle_sequence),
1168 StrategyTag::BtUltra2 => {
1169 self.compress_block::<strategy::BtUltra2>(&mut handle_sequence)
1170 }
1171 }
1172 }
1173
1174 fn skip_matching(&mut self) {
1175 self.skip_matching_with_hint(None);
1176 }
1177
1178 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1179 match self.active_backend() {
1180 super::strategy::BackendTag::Simple => self
1181 .simple_mut()
1182 .skip_matching_with_hint(incompressible_hint),
1183 super::strategy::BackendTag::Dfast => {
1184 self.dfast_matcher_mut().skip_matching(incompressible_hint)
1185 }
1186 super::strategy::BackendTag::Row => self
1187 .row_matcher_mut()
1188 .skip_matching_with_hint(incompressible_hint),
1189 super::strategy::BackendTag::HashChain => {
1190 self.hc_matcher_mut().skip_matching(incompressible_hint)
1191 }
1192 }
1193 }
1194}
1195
1196impl MatchGeneratorDriver {
1197 fn compress_block<S: super::strategy::Strategy>(
1206 &mut self,
1207 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1208 ) {
1209 use super::strategy::BackendTag;
1210 match S::BACKEND {
1211 BackendTag::Simple => {
1212 let matcher = self.simple_mut();
1220 while matcher.next_sequence(&mut *handle_sequence) {}
1221 }
1222 BackendTag::Dfast => self.dfast_matcher_mut().start_matching(handle_sequence),
1223 BackendTag::Row => self.row_matcher_mut().start_matching(handle_sequence),
1224 BackendTag::HashChain => self
1225 .hc_matcher_mut()
1226 .start_matching_strategy::<S>(handle_sequence),
1227 }
1228 }
1229}
1230
1231pub(crate) enum HcBackend {
1245 Hc,
1247 Bt(alloc::boxed::Box<super::bt::BtMatcher>),
1251}
1252
1253impl HcBackend {
1254 #[inline(always)]
1261 pub(crate) fn bt_mut(&mut self) -> &mut super::bt::BtMatcher {
1262 match self {
1263 Self::Bt(bt) => bt,
1264 Self::Hc => unreachable!("BT-only accessor called in HC mode"),
1265 }
1266 }
1267}
1268
1269struct HcMatchGenerator {
1270 table: super::match_table::storage::MatchTable,
1275 hc: super::hc::HcMatcher,
1279 backend: HcBackend,
1284 strategy_tag: super::strategy::StrategyTag,
1296}
1297
1298macro_rules! bt_insert_step_no_rebase_body {
1314 ($table:expr, $search_depth:expr, $abs_pos:ident, $current_abs_end:ident, $target_abs:ident, $cmf:path) => {{
1315 let idx = $abs_pos - $table.history_abs_start;
1316 let concat = &$table.history[$table.history_start..];
1317 if idx + 8 > concat.len() {
1318 return 1;
1319 }
1320 debug_assert!(
1321 $abs_pos <= $current_abs_end,
1322 "BT walker called past current block end"
1323 );
1324 let tail_limit = $current_abs_end - $abs_pos;
1325 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
1326 concat,
1327 idx,
1328 $table.hash_log,
1329 $crate::encoding::bt::BtMatcher::HASH_MLS,
1330 );
1331 let Some(relative_pos) = $table.relative_position($abs_pos) else {
1332 return 1;
1333 };
1334 let stored = relative_pos + 1;
1335 let bt_mask = $table.bt_mask();
1336 let bt_low = $abs_pos.saturating_sub(bt_mask);
1342 let window_low = $table.window_low_abs_for_target($target_abs);
1343 let mut match_end_abs = $abs_pos + 9;
1352 let mut best_len = 8usize;
1353 let mut compares_left = $search_depth;
1354 let mut common_length_smaller = 0usize;
1355 let mut common_length_larger = 0usize;
1356 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
1357 let mut smaller_slot = pair_idx;
1358 let mut larger_slot = pair_idx + 1;
1359 let mut match_stored = $table.hash_table[hash];
1360 $table.hash_table[hash] = stored;
1361
1362 while compares_left > 0 {
1363 let Some(candidate_abs) =
1364 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
1365 match_stored,
1366 $table.position_base,
1367 $table.index_shift,
1368 )
1369 else {
1370 break;
1371 };
1372 if candidate_abs < window_low || candidate_abs >= $abs_pos {
1373 break;
1374 }
1375 compares_left -= 1;
1376
1377 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
1378 let next_smaller = $table.chain_table[next_pair_idx];
1379 let next_larger = $table.chain_table[next_pair_idx + 1];
1380 let seed_len = common_length_smaller.min(common_length_larger);
1381 let candidate_idx = candidate_abs - $table.history_abs_start;
1382 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
1387
1388 if match_len > best_len {
1389 best_len = match_len;
1390 let candidate_end = candidate_abs + match_len;
1394 if candidate_end > match_end_abs {
1395 match_end_abs = candidate_end;
1396 }
1397 }
1398
1399 if match_len >= tail_limit {
1400 break;
1401 }
1402
1403 let candidate_next = candidate_idx + match_len;
1404 let current_next = idx + match_len;
1405 if concat[candidate_next] < concat[current_next] {
1406 $table.chain_table[smaller_slot] = match_stored;
1407 common_length_smaller = match_len;
1408 if candidate_abs <= bt_low {
1409 smaller_slot = usize::MAX;
1410 break;
1411 }
1412 smaller_slot = next_pair_idx + 1;
1413 match_stored = next_larger;
1414 } else {
1415 $table.chain_table[larger_slot] = match_stored;
1416 common_length_larger = match_len;
1417 if candidate_abs <= bt_low {
1418 larger_slot = usize::MAX;
1419 break;
1420 }
1421 larger_slot = next_pair_idx;
1422 match_stored = next_smaller;
1423 }
1424 }
1425
1426 if smaller_slot != usize::MAX {
1427 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1428 }
1429 if larger_slot != usize::MAX {
1430 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1431 }
1432
1433 let speed_positions = if best_len > 384 {
1434 (best_len - 384).min(192)
1435 } else {
1436 0
1437 };
1438 speed_positions.max(match_end_abs - ($abs_pos + 8))
1448 }};
1449}
1450pub(crate) use bt_insert_step_no_rebase_body;
1451
1452macro_rules! build_optimal_plan_impl_body {
1467 (
1468 $self:expr,
1469 $strategy_ty:ty,
1470 $current:ident,
1471 $current_abs_start:ident,
1472 $current_len:ident,
1473 $initial_state:ident,
1474 $stats:ident,
1475 $out:ident,
1476 $collect:ident $(,)?
1477 ) => {{
1478 let current_abs_end = $current_abs_start + $current_len;
1479 let min_match_len = HC_OPT_MIN_MATCH_LEN;
1480 let frontier_limit = $current_len.min(HC_OPT_NUM - 1);
1482 let initial_reps = $initial_state.reps;
1483 let initial_litlen = $initial_state.litlen;
1484 let mut profile = $initial_state.profile;
1485 profile.sufficient_match_len = $self.hc.sufficient_match_len_for_pass(profile);
1486 debug_assert!(
1498 <$strategy_ty as super::strategy::Strategy>::USE_BT,
1499 "build_optimal_plan_impl_body called on non-BT strategy"
1500 );
1501 let abort_on_worse_match: bool =
1502 <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL == 0;
1503 let opt_level: bool = <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL >= 2;
1504 let mut nodes = core::mem::take(&mut $self.backend.bt_mut().opt_nodes_scratch);
1505 let frontier_buffer_size = frontier_limit + 2;
1507 if nodes.len() < frontier_buffer_size {
1508 nodes.resize(frontier_buffer_size, HcOptimalNode::default());
1509 }
1510 let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
1511 candidates.clear();
1512 if candidates.capacity() < MAX_HC_SEARCH_DEPTH {
1513 candidates.reserve_exact(MAX_HC_SEARCH_DEPTH - candidates.capacity());
1514 }
1515 let mut store = core::mem::take(&mut $self.backend.bt_mut().opt_store_scratch);
1516 store.clear();
1517 let mut ll_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_scratch);
1518 let mut ll_price_generations =
1519 core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_generation);
1520 if ll_prices.len() <= frontier_limit {
1521 ll_prices.resize(frontier_limit + 1, 0);
1522 ll_price_generations.resize(frontier_limit + 1, 0);
1523 }
1524 $self.backend.bt_mut().opt_ll_price_stamp = $self
1525 .backend
1526 .bt_mut()
1527 .opt_ll_price_stamp
1528 .wrapping_add(1)
1529 .max(1);
1530 let ll_price_stamp = $self.backend.bt_mut().opt_ll_price_stamp;
1531 $self.backend.bt_mut().opt_lit_price_stamp = $self
1532 .backend
1533 .bt_mut()
1534 .opt_lit_price_stamp
1535 .wrapping_add(1)
1536 .max(1);
1537 let lit_price_stamp = $self.backend.bt_mut().opt_lit_price_stamp;
1538 let mut ml_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_scratch);
1539 let mut ml_price_generations =
1540 core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_generation);
1541 if ml_prices.len() <= frontier_limit {
1542 ml_prices.resize(frontier_limit + 1, 0);
1543 ml_price_generations.resize(frontier_limit + 1, 0);
1544 }
1545 $self.backend.bt_mut().opt_ml_price_stamp = $self
1546 .backend
1547 .bt_mut()
1548 .opt_ml_price_stamp
1549 .wrapping_add(1)
1550 .max(1);
1551 let ml_price_stamp = $self.backend.bt_mut().opt_ml_price_stamp;
1552 nodes[0] = HcOptimalNode {
1553 price: BtMatcher::cached_lit_length_price(
1554 profile,
1555 $stats,
1556 initial_litlen,
1557 &mut ll_prices,
1558 &mut ll_price_generations,
1559 ll_price_stamp,
1560 ),
1561 litlen: initial_litlen as u32,
1562 reps: initial_reps,
1563 ..HcOptimalNode::default()
1564 };
1565 let sufficient_len = profile.sufficient_match_len;
1566 let ll0_price = BtMatcher::cached_lit_length_price(
1567 profile,
1568 $stats,
1569 0,
1570 &mut ll_prices,
1571 &mut ll_price_generations,
1572 ll_price_stamp,
1573 );
1574 let ll1_price = BtMatcher::cached_lit_length_price(
1575 profile,
1576 $stats,
1577 1,
1578 &mut ll_prices,
1579 &mut ll_price_generations,
1580 ll_price_stamp,
1581 );
1582 let mut pos = 1usize;
1583 let mut last_pos = 0usize;
1584 let mut forced_end: Option<usize> = None;
1585 let mut forced_end_state: Option<HcOptimalNode> = None;
1586 let mut seed_forced_shortest_path = false;
1587 let mut opt_ldm = HcOptLdmState {
1588 seq_store: HcRawSeqStore {
1589 pos: 0,
1590 pos_in_sequence: 0,
1591 size: $self.backend.bt_mut().ldm_sequences.len(),
1592 },
1593 ..HcOptLdmState::default()
1594 };
1595 let has_ldm = !$self.backend.bt_mut().ldm_sequences.is_empty();
1596 if has_ldm {
1597 $self
1598 .backend
1599 .bt_mut()
1600 .ldm_get_next_match_and_update_seq_store(&mut opt_ldm, 0, $current_len);
1601 }
1602
1603 if $current_len >= min_match_len {
1606 let seed_ldm = if has_ldm {
1607 $self.backend.bt_mut().ldm_process_match_candidate(
1608 &mut opt_ldm,
1609 0,
1610 $current_len,
1611 min_match_len,
1612 )
1613 } else {
1614 None
1615 };
1616 candidates.clear();
1617 unsafe {
1621 $self.$collect::<$strategy_ty, true>(
1622 $current_abs_start,
1623 current_abs_end,
1624 profile,
1625 HcCandidateQuery {
1626 reps: initial_reps,
1627 lit_len: initial_litlen,
1628 ldm_candidate: seed_ldm,
1629 },
1630 &mut candidates,
1631 )
1632 };
1633 if !candidates.is_empty() {
1634 last_pos = (min_match_len - 1).min(frontier_limit);
1636 for p in 1..min_match_len.min(nodes.len()) {
1637 BtMatcher::reset_opt_node(&mut nodes[p]);
1638 let seed_litlen = initial_litlen
1648 .checked_add(p)
1649 .and_then(|s| u32::try_from(s).ok())
1650 .expect("optimal parser seed litlen out of u32 range");
1651 nodes[p].litlen = seed_litlen;
1652 }
1653 }
1654
1655 if let Some(candidate) = candidates.last() {
1656 let longest_len = candidate.match_len.min($current_len);
1657 if longest_len > sufficient_len {
1658 let off_base = BtMatcher::encode_offset_base_with_reps(
1659 candidate.offset as u32,
1660 initial_litlen,
1661 initial_reps,
1662 );
1663 let off_price = profile
1664 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1665 let ml_price = BtMatcher::cached_match_length_price(
1666 profile,
1667 $stats,
1668 longest_len,
1669 &mut ml_prices,
1670 &mut ml_price_generations,
1671 ml_price_stamp,
1672 );
1673 let seq_cost = BtMatcher::add_prices(
1674 ll0_price,
1675 profile.match_price_from_parts(off_price, ml_price, $stats),
1676 );
1677 let forced_price = BtMatcher::add_prices(nodes[0].price, seq_cost);
1678 let forced_state = HcOptimalNode {
1679 price: forced_price,
1680 off: candidate.offset as u32,
1681 mlen: longest_len as u32,
1682 litlen: 0,
1683 reps: initial_reps,
1684 };
1685 if longest_len < nodes.len() && forced_price < nodes[longest_len].price {
1686 nodes[longest_len] = forced_state;
1687 }
1688 forced_end = Some(longest_len);
1689 forced_end_state = Some(forced_state);
1690 seed_forced_shortest_path = true;
1691 }
1692 }
1693 if !seed_forced_shortest_path {
1694 let mut prev_max_len = min_match_len - 1;
1695 for candidate in candidates.iter() {
1696 let max_match_len = candidate.match_len.min(frontier_limit);
1697 if max_match_len < min_match_len {
1698 continue;
1699 }
1700 let start_len = (prev_max_len + 1).max(min_match_len);
1701 if start_len > max_match_len {
1702 prev_max_len = prev_max_len.max(max_match_len);
1703 continue;
1704 }
1705 if max_match_len > last_pos {
1706 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_match_len);
1707 }
1708 let off_base = BtMatcher::encode_offset_base_with_reps(
1709 candidate.offset as u32,
1710 initial_litlen,
1711 initial_reps,
1712 );
1713 let off_price = profile
1714 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1715 debug_assert!(max_match_len < nodes.len());
1716 let nodes0_price = nodes[0].price;
1717 for match_len in (start_len..=max_match_len).rev() {
1718 let ml_price = BtMatcher::cached_match_length_price(
1719 profile,
1720 $stats,
1721 match_len,
1722 &mut ml_prices,
1723 &mut ml_price_generations,
1724 ml_price_stamp,
1725 );
1726 let seq_cost = BtMatcher::add_prices(
1727 ll0_price,
1728 profile.match_price_from_parts(off_price, ml_price, $stats),
1729 );
1730 let next_cost = BtMatcher::add_prices(nodes0_price, seq_cost);
1731 let node_price = unsafe { nodes.get_unchecked(match_len).price };
1732 if match_len > last_pos || next_cost < node_price {
1733 let slot = unsafe { nodes.get_unchecked_mut(match_len) };
1734 *slot = HcOptimalNode {
1735 price: next_cost,
1736 off: candidate.offset as u32,
1737 mlen: match_len as u32,
1738 litlen: 0,
1739 reps: initial_reps,
1740 };
1741 if match_len > last_pos {
1742 last_pos = match_len;
1743 }
1744 } else if abort_on_worse_match {
1745 break;
1746 }
1747 }
1748 prev_max_len = prev_max_len.max(max_match_len);
1749 }
1750 if last_pos + 1 < nodes.len() {
1751 nodes[last_pos + 1].price = u32::MAX;
1752 }
1753 }
1754 }
1755 while !seed_forced_shortest_path && pos <= last_pos && pos <= frontier_limit {
1756 debug_assert!(pos + 1 < nodes.len());
1757 let prev_node = unsafe { *nodes.get_unchecked(pos - 1) };
1758 if prev_node.price != u32::MAX {
1759 let lit_len = prev_node.litlen as usize + 1;
1760 let lit_price = {
1761 let bt = $self.backend.bt_mut();
1762 BtMatcher::cached_literal_price(
1763 profile,
1764 $stats,
1765 $current[pos - 1],
1766 &mut bt.opt_lit_price_scratch,
1767 &mut bt.opt_lit_price_generation,
1768 lit_price_stamp,
1769 )
1770 };
1771 let ll_delta = BtMatcher::cached_lit_length_delta_price(
1772 profile,
1773 $stats,
1774 lit_len,
1775 &mut ll_prices,
1776 &mut ll_price_generations,
1777 ll_price_stamp,
1778 );
1779 let lit_cost = BtMatcher::add_price_delta(prev_node.price, lit_price, ll_delta);
1780 let node_pos_price = unsafe { nodes.get_unchecked(pos).price };
1781 if lit_cost <= node_pos_price {
1782 let prev_match = unsafe { *nodes.get_unchecked(pos) };
1783 let slot = unsafe { nodes.get_unchecked_mut(pos) };
1784 *slot = prev_node;
1785 slot.litlen = lit_len as u32;
1786 slot.price = lit_cost;
1787 #[allow(clippy::collapsible_if)]
1788 if opt_level
1789 && prev_match.mlen > 0
1790 && prev_match.litlen == 0
1791 && pos < $current_len
1792 {
1793 if ll1_price < ll0_price {
1794 let next_lit_price = {
1795 let bt = $self.backend.bt_mut();
1796 BtMatcher::cached_literal_price(
1797 profile,
1798 $stats,
1799 $current[pos],
1800 &mut bt.opt_lit_price_scratch,
1801 &mut bt.opt_lit_price_generation,
1802 lit_price_stamp,
1803 )
1804 };
1805 let with1literal = BtMatcher::add_price_delta(
1806 prev_match.price,
1807 next_lit_price,
1808 ll1_price as i32 - ll0_price as i32,
1809 );
1810 let ll_delta_next = BtMatcher::cached_lit_length_delta_price(
1811 profile,
1812 $stats,
1813 lit_len + 1,
1814 &mut ll_prices,
1815 &mut ll_price_generations,
1816 ll_price_stamp,
1817 );
1818 let with_more_literals =
1819 BtMatcher::add_price_delta(lit_cost, next_lit_price, ll_delta_next);
1820 let next = pos + 1;
1821 let next_price = unsafe { nodes.get_unchecked(next).price };
1822 if with1literal < with_more_literals && with1literal < next_price {
1823 debug_assert!(pos >= prev_match.mlen as usize);
1825 let prev_pos = pos - prev_match.mlen as usize;
1826 {
1827 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
1828 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
1829 prev_match.off,
1830 prev_state.litlen as usize,
1831 prev_state.reps,
1832 );
1833 let slot = unsafe { nodes.get_unchecked_mut(next) };
1834 *slot = prev_match;
1835 slot.reps = reps_after_match;
1836 slot.litlen = 1;
1837 slot.price = with1literal;
1838 if next > last_pos {
1839 last_pos = next;
1840 }
1841 }
1842 }
1843 }
1844 }
1845 }
1846 }
1847
1848 let mut base_node = unsafe { *nodes.get_unchecked(pos) };
1849 if base_node.price == u32::MAX {
1850 pos += 1;
1851 continue;
1852 }
1853 if base_node.mlen > 0 && base_node.litlen == 0 {
1854 debug_assert!(pos >= base_node.mlen as usize);
1856 let prev_pos = pos - base_node.mlen as usize;
1857 {
1858 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
1859 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
1860 base_node.off,
1861 prev_state.litlen as usize,
1862 prev_state.reps,
1863 );
1864 base_node.reps = reps_after_match;
1865 unsafe { nodes.get_unchecked_mut(pos).reps = reps_after_match };
1866 }
1867 }
1868 let base_cost = base_node.price;
1869
1870 if pos + 8 > $current_len {
1871 pos += 1;
1872 continue;
1873 }
1874
1875 if pos == last_pos {
1876 break;
1877 }
1878
1879 let next_price = unsafe { nodes.get_unchecked(pos + 1).price };
1880 if abort_on_worse_match
1881 && next_price <= base_cost.saturating_add(HC_BITCOST_MULTIPLIER / 2)
1882 {
1883 pos += 1;
1884 continue;
1885 }
1886
1887 let abs_pos = $current_abs_start + pos;
1888 let ldm_candidate = if has_ldm {
1889 $self.backend.bt_mut().ldm_process_match_candidate(
1890 &mut opt_ldm,
1891 pos,
1892 $current_len - pos,
1893 min_match_len,
1894 )
1895 } else {
1896 None
1897 };
1898 candidates.clear();
1899 unsafe {
1901 $self.$collect::<$strategy_ty, true>(
1902 abs_pos,
1903 current_abs_end,
1904 profile,
1905 HcCandidateQuery {
1906 reps: base_node.reps,
1907 lit_len: base_node.litlen as usize,
1908 ldm_candidate,
1909 },
1910 &mut candidates,
1911 )
1912 };
1913 if let Some(candidate) = candidates.last() {
1914 let longest_len = candidate.match_len.min($current_len - pos);
1915 if longest_len > sufficient_len
1916 || pos + longest_len >= HC_OPT_NUM
1917 || pos + longest_len >= $current_len
1918 {
1919 let lit_len = base_node.litlen as usize;
1920 let off_base = BtMatcher::encode_offset_base_with_reps(
1921 candidate.offset as u32,
1922 lit_len,
1923 base_node.reps,
1924 );
1925 let off_price = profile
1926 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1927 let ml_price = BtMatcher::cached_match_length_price(
1928 profile,
1929 $stats,
1930 longest_len,
1931 &mut ml_prices,
1932 &mut ml_price_generations,
1933 ml_price_stamp,
1934 );
1935 let seq_cost = BtMatcher::add_prices(
1936 ll0_price,
1937 profile.match_price_from_parts(off_price, ml_price, $stats),
1938 );
1939 let forced_price = BtMatcher::add_prices(base_cost, seq_cost);
1940 let end_pos = (pos + longest_len).min($current_len);
1941 forced_end = Some(end_pos);
1942 forced_end_state = Some(HcOptimalNode {
1943 price: forced_price,
1944 off: candidate.offset as u32,
1945 mlen: longest_len as u32,
1946 litlen: 0,
1947 reps: base_node.reps,
1948 });
1949 break;
1950 }
1951 }
1952 let mut prev_max_len = min_match_len - 1;
1953 for candidate in candidates.iter() {
1954 debug_assert!(pos <= frontier_limit);
1958 let max_match_len = candidate
1959 .match_len
1960 .min($current_len - pos)
1961 .min(frontier_limit - pos);
1962 let min_len = min_match_len;
1963 if max_match_len < min_len {
1964 continue;
1965 }
1966 let start_len = (prev_max_len + 1).max(min_len);
1967 if start_len > max_match_len {
1968 prev_max_len = prev_max_len.max(max_match_len);
1969 continue;
1970 }
1971 let max_next = pos + max_match_len;
1972 if max_next > last_pos {
1973 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_next);
1974 }
1975 let lit_len = base_node.litlen as usize;
1976 let off_base = BtMatcher::encode_offset_base_with_reps(
1977 candidate.offset as u32,
1978 lit_len,
1979 base_node.reps,
1980 );
1981 let off_price = profile
1982 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1983 debug_assert!(pos + max_match_len < nodes.len());
1984 for match_len in (start_len..=max_match_len).rev() {
1985 let next = pos + match_len;
1986 let ml_price = BtMatcher::cached_match_length_price(
1987 profile,
1988 $stats,
1989 match_len,
1990 &mut ml_prices,
1991 &mut ml_price_generations,
1992 ml_price_stamp,
1993 );
1994 let seq_cost = BtMatcher::add_prices(
1995 ll0_price,
1996 profile.match_price_from_parts(off_price, ml_price, $stats),
1997 );
1998 let next_cost = BtMatcher::add_prices(base_cost, seq_cost);
1999 let node_next_price = unsafe { nodes.get_unchecked(next).price };
2000 let improved = next > last_pos || next_cost < node_next_price;
2001 if improved {
2002 let slot = unsafe { nodes.get_unchecked_mut(next) };
2003 *slot = HcOptimalNode {
2004 price: next_cost,
2005 off: candidate.offset as u32,
2006 mlen: match_len as u32,
2007 litlen: 0,
2008 reps: base_node.reps,
2009 };
2010 if next > last_pos {
2011 last_pos = next;
2012 }
2013 } else if abort_on_worse_match {
2014 break;
2015 }
2016 }
2017 prev_max_len = prev_max_len.max(max_match_len);
2018 }
2019
2020 if last_pos + 1 < nodes.len() {
2021 unsafe {
2022 nodes.get_unchecked_mut(last_pos + 1).price = u32::MAX;
2023 }
2024 }
2025 pos += 1;
2026 }
2027
2028 if last_pos == 0 {
2029 if $current_len == 0 {
2030 let price = nodes[0].price;
2031 return $self.backend.bt_mut().finish_optimal_plan(
2032 HcOptimalPlanBuffers {
2033 nodes,
2034 candidates,
2035 store,
2036 ll_prices,
2037 ll_price_generations,
2038 ml_prices,
2039 ml_price_generations,
2040 },
2041 (price, initial_reps, initial_litlen, 0),
2042 );
2043 }
2044 let lit_price = {
2045 let bt = $self.backend.bt_mut();
2046 BtMatcher::cached_literal_price(
2047 profile,
2048 $stats,
2049 $current[0],
2050 &mut bt.opt_lit_price_scratch,
2051 &mut bt.opt_lit_price_generation,
2052 lit_price_stamp,
2053 )
2054 };
2055 let next_litlen = initial_litlen
2062 .checked_add(1)
2063 .expect("optimal parser next litlen out of usize range");
2064 let ll_delta = BtMatcher::cached_lit_length_delta_price(
2065 profile,
2066 $stats,
2067 next_litlen,
2068 &mut ll_prices,
2069 &mut ll_price_generations,
2070 ll_price_stamp,
2071 );
2072 let price = BtMatcher::add_price_delta(nodes[0].price, lit_price, ll_delta);
2073 return $self.backend.bt_mut().finish_optimal_plan(
2074 HcOptimalPlanBuffers {
2075 nodes,
2076 candidates,
2077 store,
2078 ll_prices,
2079 ll_price_generations,
2080 ml_prices,
2081 ml_price_generations,
2082 },
2083 (price, initial_reps, next_litlen, 1),
2084 );
2085 }
2086
2087 let target_pos = forced_end.unwrap_or(last_pos.min(frontier_limit));
2088 let last_stretch = if let Some(forced_state) = forced_end_state {
2089 forced_state
2090 } else {
2091 nodes[target_pos]
2092 };
2093 if last_stretch.price == u32::MAX {
2094 return $self.backend.bt_mut().finish_optimal_plan(
2095 HcOptimalPlanBuffers {
2096 nodes,
2097 candidates,
2098 store,
2099 ll_prices,
2100 ll_price_generations,
2101 ml_prices,
2102 ml_price_generations,
2103 },
2104 (u32::MAX, initial_reps, initial_litlen, $current_len),
2105 );
2106 }
2107
2108 if last_stretch.mlen == 0 {
2109 return $self.backend.bt_mut().finish_optimal_plan(
2110 HcOptimalPlanBuffers {
2111 nodes,
2112 candidates,
2113 store,
2114 ll_prices,
2115 ll_price_generations,
2116 ml_prices,
2117 ml_price_generations,
2118 },
2119 (
2120 last_stretch.price,
2121 last_stretch.reps,
2122 last_stretch.litlen as usize,
2123 target_pos.min($current_len),
2124 ),
2125 );
2126 }
2127
2128 let mut cur = target_pos.saturating_sub(last_stretch.mlen as usize);
2129 let end_reps = if last_stretch.litlen == 0 {
2130 let prev_state = nodes[cur];
2131 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2132 last_stretch.off,
2133 prev_state.litlen as usize,
2134 prev_state.reps,
2135 );
2136 reps_after_match
2137 } else {
2138 let tail_literals = last_stretch.litlen as usize;
2139 if cur < tail_literals {
2140 return $self.backend.bt_mut().finish_optimal_plan(
2141 HcOptimalPlanBuffers {
2142 nodes,
2143 candidates,
2144 store,
2145 ll_prices,
2146 ll_price_generations,
2147 ml_prices,
2148 ml_price_generations,
2149 },
2150 (
2151 last_stretch.price,
2152 last_stretch.reps,
2153 tail_literals,
2154 target_pos.min($current_len),
2155 ),
2156 );
2157 }
2158 cur -= tail_literals;
2159 last_stretch.reps
2160 };
2161 let store_end = cur + 2;
2162 if store.len() <= store_end {
2163 store.resize(store_end + 1, HcOptimalNode::default());
2164 }
2165 let mut store_start;
2166 let mut stretch_pos = cur;
2167
2168 if last_stretch.litlen > 0 {
2169 store[store_end] = HcOptimalNode {
2170 litlen: last_stretch.litlen,
2171 mlen: 0,
2172 ..HcOptimalNode::default()
2173 };
2174 store_start = store_end.saturating_sub(1);
2175 store[store_start] = last_stretch;
2176 }
2177 store[store_end] = last_stretch;
2178 store_start = store_end;
2179
2180 loop {
2181 let next_stretch = nodes[stretch_pos];
2182 store[store_start].litlen = next_stretch.litlen;
2183 if next_stretch.mlen == 0 {
2184 break;
2185 }
2186 if store_start == 0 {
2187 break;
2188 }
2189 store_start -= 1;
2190 store[store_start] = next_stretch;
2191 let litlen = next_stretch.litlen as usize;
2198 let mlen = next_stretch.mlen as usize;
2199 debug_assert!(litlen + mlen <= $current_len);
2200 let step = litlen + mlen;
2201 if step == 0 || stretch_pos < step {
2202 break;
2203 }
2204 stretch_pos -= step;
2205 }
2206
2207 let mut tail_literals = initial_litlen;
2208 let mut store_pos = store_start;
2209 while store_pos <= store_end {
2210 let stretch = store[store_pos];
2211 let llen = stretch.litlen as usize;
2212 let mlen = stretch.mlen as usize;
2213 if mlen == 0 {
2214 tail_literals = llen;
2215 store_pos += 1;
2216 continue;
2217 }
2218 $out.push(HcOptimalSequence {
2219 offset: stretch.off,
2220 match_len: mlen as u32,
2221 lit_len: llen as u32,
2222 });
2223 tail_literals = 0;
2224 store_pos += 1;
2225 }
2226 let result = (
2227 last_stretch.price,
2228 end_reps,
2229 if last_stretch.litlen > 0 {
2230 last_stretch.litlen as usize
2231 } else {
2232 tail_literals
2233 },
2234 target_pos.min($current_len),
2235 );
2236 $self.backend.bt_mut().finish_optimal_plan(
2237 HcOptimalPlanBuffers {
2238 nodes,
2239 candidates,
2240 store,
2241 ll_prices,
2242 ll_price_generations,
2243 ml_prices,
2244 ml_price_generations,
2245 },
2246 result,
2247 )
2248 }};
2249}
2250
2251macro_rules! collect_optimal_candidates_initialized_body {
2260 (
2261 $self:expr,
2262 $strategy_ty:ty,
2263 $abs_pos:ident,
2264 $current_abs_end:ident,
2265 $profile:ident,
2266 $query:ident,
2267 $out:ident,
2268 $bt_matchfinder:ident,
2269 $bt_update:ident,
2270 $bt_insert:ident,
2271 $for_each_rep:ident,
2272 $hash3:ident,
2273 $cpl:path $(,)?
2274 ) => {{
2275 let use_hash3: bool = <$strategy_ty as super::strategy::Strategy>::USE_HASH3;
2284 debug_assert!(!$self.table.hash_table.is_empty());
2285 debug_assert!($self.table.hash3_log == 0 || !$self.table.hash3_table.is_empty());
2286 debug_assert!(
2287 !use_hash3 || $self.table.hash3_log != 0,
2288 "Strategy::USE_HASH3 = true but runtime hash3_log is 0 — call configure() first",
2289 );
2290 debug_assert!(!$self.table.chain_table.is_empty());
2291 let min_match_len = HC_OPT_MIN_MATCH_LEN;
2292 let reps = $query.reps;
2293 let lit_len = $query.lit_len;
2294 let ldm_candidate = $query.ldm_candidate;
2295 $out.clear();
2296 if $abs_pos < $self.table.skip_insert_until_abs {
2297 if let Some(ldm) = ldm_candidate {
2298 let mut best_len_for_skip = 0usize;
2299 let _ = super::bt::BtMatcher::push_candidate_ladder(
2300 $out,
2301 &mut best_len_for_skip,
2302 ldm,
2303 min_match_len,
2304 );
2305 }
2306 return;
2307 }
2308 if $bt_matchfinder {
2309 unsafe { $self.table.$bt_update($abs_pos, $current_abs_end) };
2312 }
2313 let current_idx = $abs_pos - $self.table.history_abs_start;
2314 if current_idx + 4 > $self.table.live_history().len() {
2315 if let Some(ldm) = ldm_candidate {
2316 let mut best_len_for_skip = 0usize;
2317 let _ = super::bt::BtMatcher::push_candidate_ladder(
2318 $out,
2319 &mut best_len_for_skip,
2320 ldm,
2321 min_match_len,
2322 );
2323 }
2324 return;
2325 }
2326 let mut best_len_for_skip = 0usize;
2327 let mut skip_further_match_search = false;
2328 let mut rep_len_candidate_found = false;
2329 unsafe {
2331 $self.hc.$for_each_rep(
2332 &$self.table,
2333 $abs_pos,
2334 lit_len,
2335 reps,
2336 $current_abs_end,
2337 min_match_len,
2338 |rep| {
2339 if rep.match_len >= min_match_len {
2340 rep_len_candidate_found = true;
2341 }
2342 let _ = super::bt::BtMatcher::push_candidate_ladder(
2343 $out,
2344 &mut best_len_for_skip,
2345 rep,
2346 min_match_len,
2347 );
2348 if rep.match_len > $profile.sufficient_match_len {
2349 skip_further_match_search = true;
2350 }
2351 if $abs_pos + rep.match_len >= $current_abs_end {
2358 skip_further_match_search = true;
2359 }
2360 },
2361 )
2362 };
2363 if use_hash3 && !skip_further_match_search && best_len_for_skip < min_match_len {
2367 $self.table.update_hash3_until($abs_pos);
2368 if let Some(h3) = unsafe {
2370 $self
2371 .table
2372 .$hash3($abs_pos, $current_abs_end, min_match_len)
2373 } {
2374 let _ = super::bt::BtMatcher::push_candidate_ladder(
2375 $out,
2376 &mut best_len_for_skip,
2377 h3,
2378 min_match_len,
2379 );
2380 if !rep_len_candidate_found
2381 && (h3.match_len > $profile.sufficient_match_len
2382 || $abs_pos + h3.match_len >= $current_abs_end)
2383 {
2384 $self.table.skip_insert_until_abs = $abs_pos + 1;
2385 skip_further_match_search = true;
2386 }
2387 }
2388 }
2389 if !skip_further_match_search && $bt_matchfinder {
2390 unsafe {
2392 $self.table.$bt_insert(
2393 $abs_pos,
2394 $current_abs_end,
2395 $profile,
2396 min_match_len,
2397 &mut best_len_for_skip,
2398 $out,
2399 )
2400 };
2401 } else if !skip_further_match_search {
2402 $self.table.insert_position($abs_pos);
2403 let max_chain_depth = $profile.max_chain_depth.min($self.hc.search_depth);
2404 let concat = &$self.table.history[$self.table.history_start..];
2405 let mut match_end_abs = $abs_pos + 9;
2409 if max_chain_depth > 0 {
2410 for (visited, candidate_abs) in $self
2411 .hc
2412 .chain_candidates(&$self.table, $abs_pos)
2413 .into_iter()
2414 .enumerate()
2415 {
2416 if visited >= max_chain_depth {
2417 break;
2418 }
2419 if candidate_abs == usize::MAX {
2420 break;
2421 }
2422 if candidate_abs < $self.table.history_abs_start || candidate_abs >= $abs_pos {
2423 continue;
2424 }
2425 let candidate_idx = candidate_abs - $self.table.history_abs_start;
2426 debug_assert!(
2427 $abs_pos <= $current_abs_end,
2428 "HC chain walker called past current block end"
2429 );
2430 let tail_limit = $current_abs_end - $abs_pos;
2431 let base = concat.as_ptr();
2432 let match_len =
2437 unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), tail_limit) };
2438 if match_len < min_match_len {
2439 continue;
2440 }
2441 let offset = $abs_pos - candidate_abs;
2442 if super::bt::BtMatcher::push_candidate_ladder(
2443 $out,
2444 &mut best_len_for_skip,
2445 MatchCandidate {
2446 start: $abs_pos,
2447 offset,
2448 match_len,
2449 },
2450 min_match_len,
2451 ) {
2452 let candidate_end = candidate_abs + match_len;
2453 if candidate_end > match_end_abs {
2454 match_end_abs = candidate_end;
2455 }
2456 }
2457 if match_len > HC_OPT_NUM || $abs_pos + match_len >= $current_abs_end {
2458 break;
2459 }
2460 }
2461 }
2462 $self.table.skip_insert_until_abs =
2465 $self.table.skip_insert_until_abs.max(match_end_abs - 8);
2466 }
2467 if let Some(ldm) = ldm_candidate {
2468 let _ = super::bt::BtMatcher::push_candidate_ladder(
2469 $out,
2470 &mut best_len_for_skip,
2471 ldm,
2472 min_match_len,
2473 );
2474 }
2475 }};
2476}
2477
2478macro_rules! hash3_candidate_body {
2483 (
2484 $table:expr,
2485 $abs_pos:ident,
2486 $current_abs_end:ident,
2487 $min_match_len:ident,
2488 $cpl:path $(,)?
2489 ) => {{
2490 if $table.hash3_log == 0 {
2491 return None;
2492 }
2493 let idx = $abs_pos.checked_sub($table.history_abs_start)?;
2494 let concat = $table.live_history();
2495 if idx + 4 > concat.len() {
2496 return None;
2497 }
2498 let hash3 = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2499 concat,
2500 idx,
2501 $table.hash3_log,
2502 3,
2503 );
2504 let entry = $table
2505 .hash3_table
2506 .get(hash3)
2507 .copied()
2508 .unwrap_or($crate::encoding::match_table::storage::HC_EMPTY);
2509 let candidate_abs =
2510 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2511 entry,
2512 $table.position_base,
2513 $table.index_shift,
2514 )?;
2515 if candidate_abs < $table.history_abs_start || candidate_abs >= $abs_pos {
2516 return None;
2517 }
2518 let offset = $abs_pos - candidate_abs;
2519 if offset >= $crate::encoding::bt::HC3_MAX_OFFSET {
2520 return None;
2521 }
2522 let candidate_idx = candidate_abs - $table.history_abs_start;
2523 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2524 let base = concat.as_ptr();
2525 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(idx), tail_limit) };
2528 (match_len >= $min_match_len).then_some($crate::encoding::opt::types::MatchCandidate {
2529 start: $abs_pos,
2530 offset,
2531 match_len,
2532 })
2533 }};
2534}
2535pub(crate) use hash3_candidate_body;
2536
2537macro_rules! for_each_repcode_candidate_body {
2547 (
2548 $table:expr,
2549 $abs_pos:ident,
2550 $lit_len:ident,
2551 $reps:ident,
2552 $current_abs_end:ident,
2553 $min_match_len:ident,
2554 $f:ident,
2555 $cpl:path $(,)?
2556 ) => {{
2557 let rep_offsets: [Option<usize>; 3] = if $lit_len == 0 {
2558 [
2559 Some($reps[1] as usize),
2560 Some($reps[2] as usize),
2561 ($reps[0] > 1).then_some(($reps[0] - 1) as usize),
2562 ]
2563 } else {
2564 [
2565 Some($reps[0] as usize),
2566 Some($reps[1] as usize),
2567 Some($reps[2] as usize),
2568 ]
2569 };
2570 let concat = $table.live_history();
2571 let current_idx = $abs_pos - $table.history_abs_start;
2572 if current_idx + 4 > concat.len() {
2573 return;
2574 }
2575 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2576 let base = concat.as_ptr();
2577 let concat_len = concat.len();
2578 for rep in rep_offsets.into_iter().flatten() {
2579 if rep == 0 || rep > $abs_pos {
2580 continue;
2581 }
2582 let candidate_pos = $abs_pos - rep;
2583 if candidate_pos < $table.history_abs_start {
2584 continue;
2585 }
2586 let candidate_idx = candidate_pos - $table.history_abs_start;
2587 let max = (concat_len - candidate_idx)
2592 .min(concat_len - current_idx)
2593 .min(tail_limit);
2594 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), max) };
2595 if match_len < $min_match_len {
2596 continue;
2597 }
2598 $f(MatchCandidate {
2599 start: $abs_pos,
2600 offset: rep,
2601 match_len,
2602 });
2603 }
2604 }};
2605}
2606pub(crate) use for_each_repcode_candidate_body;
2607
2608macro_rules! bt_insert_and_collect_matches_body {
2615 (
2616 $table:expr,
2617 $search_depth:expr,
2618 $abs_pos:ident,
2619 $current_abs_end:ident,
2620 $profile:ident,
2621 $min_match_len:ident,
2622 $best_len_for_skip:ident,
2623 $out:ident,
2624 $cmf:path $(,)?
2625 ) => {{
2626 let idx = $abs_pos - $table.history_abs_start;
2627 let concat = &$table.history[$table.history_start..];
2628 if idx + 8 > concat.len() {
2629 return;
2630 }
2631 debug_assert!(
2632 $abs_pos <= $current_abs_end,
2633 "BT collect called past current block end"
2634 );
2635 let tail_limit = $current_abs_end - $abs_pos;
2636 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2637 concat,
2638 idx,
2639 $table.hash_log,
2640 $crate::encoding::bt::BtMatcher::HASH_MLS,
2641 );
2642 let Some(relative_pos) = $table.relative_position($abs_pos) else {
2643 return;
2644 };
2645 let stored = relative_pos + 1;
2646 let bt_mask = $table.bt_mask();
2647 let bt_low = $abs_pos.saturating_sub(bt_mask);
2650 let window_low = $table.window_low_abs_for_target($abs_pos);
2651 let mut match_end_abs = $abs_pos + 9;
2655 let mut compares_left = $profile.max_chain_depth.min($search_depth);
2656 let mut common_length_smaller = 0usize;
2657 let mut common_length_larger = 0usize;
2658 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
2659 let mut smaller_slot = pair_idx;
2660 let mut larger_slot = pair_idx + 1;
2661 let mut match_stored = $table.hash_table[hash];
2662 $table.hash_table[hash] = stored;
2663 debug_assert!(
2668 $min_match_len >= $crate::encoding::cost_model::HC_FORMAT_MINMATCH,
2669 "min_match_len must be at least HC_FORMAT_MINMATCH"
2670 );
2671 let mut best_len = (*$best_len_for_skip).max($min_match_len - 1);
2672
2673 while compares_left > 0 {
2674 let Some(candidate_abs) =
2675 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2676 match_stored,
2677 $table.position_base,
2678 $table.index_shift,
2679 )
2680 else {
2681 break;
2682 };
2683 if candidate_abs < window_low || candidate_abs >= $abs_pos {
2684 break;
2685 }
2686 compares_left -= 1;
2687
2688 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
2689 let next_smaller = $table.chain_table[next_pair_idx];
2690 let next_larger = $table.chain_table[next_pair_idx + 1];
2691 let seed_len = common_length_smaller.min(common_length_larger);
2692 let candidate_idx = candidate_abs - $table.history_abs_start;
2693 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
2696
2697 if match_len > best_len {
2698 let offset = $abs_pos - candidate_abs;
2699 let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
2700 $out,
2701 $best_len_for_skip,
2702 $crate::encoding::opt::types::MatchCandidate {
2703 start: $abs_pos,
2704 offset,
2705 match_len,
2706 },
2707 $min_match_len,
2708 );
2709 if accepted {
2710 best_len = match_len;
2711 let candidate_end = candidate_abs + match_len;
2719 if candidate_end > match_end_abs {
2720 match_end_abs = candidate_end;
2721 }
2722 if match_len >= tail_limit
2723 || match_len > $crate::encoding::cost_model::HC_OPT_NUM
2724 {
2725 break;
2726 }
2727 }
2728 }
2729
2730 if match_len >= tail_limit {
2731 break;
2732 }
2733
2734 let candidate_next = candidate_idx + match_len;
2735 let current_next = idx + match_len;
2736 if concat[candidate_next] < concat[current_next] {
2737 $table.chain_table[smaller_slot] = match_stored;
2738 common_length_smaller = match_len;
2739 if candidate_abs <= bt_low {
2740 smaller_slot = usize::MAX;
2741 break;
2742 }
2743 smaller_slot = next_pair_idx + 1;
2744 match_stored = next_larger;
2745 } else {
2746 $table.chain_table[larger_slot] = match_stored;
2747 common_length_larger = match_len;
2748 if candidate_abs <= bt_low {
2749 larger_slot = usize::MAX;
2750 break;
2751 }
2752 larger_slot = next_pair_idx;
2753 match_stored = next_smaller;
2754 }
2755 }
2756
2757 if smaller_slot != usize::MAX {
2758 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2759 }
2760 if larger_slot != usize::MAX {
2761 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2762 }
2763 $table.skip_insert_until_abs = match_end_abs - 8;
2766 }};
2767}
2768pub(crate) use bt_insert_and_collect_matches_body;
2769
2770impl HcMatchGenerator {
2771 fn should_run_btultra2_seed_pass<S: super::strategy::Strategy>(
2772 &self,
2773 current_len: usize,
2774 ) -> bool {
2775 if !(S::OPT_LEVEL == 2 && S::USE_HASH3) {
2782 return false;
2783 }
2784 let HcBackend::Bt(bt) = &self.backend else {
2785 return false;
2786 };
2787 bt.opt_state.lit_length_sum == 0
2788 && bt.opt_state.dictionary_seed.is_none()
2789 && !self.table.dictionary_primed_for_frame
2790 && bt.ldm_sequences.is_empty()
2791 && self.table.window_size == current_len
2792 && self.table.history_abs_start == 0
2793 && self.table.window.len() == 1
2794 && current_len > HC_PREDEF_THRESHOLD
2795 }
2796
2797 fn new(max_window_size: usize) -> Self {
2798 Self {
2799 table: super::match_table::storage::MatchTable::new(max_window_size),
2800 hc: super::hc::HcMatcher::new(2, HC_SEARCH_DEPTH, HC_TARGET_LEN),
2801 backend: HcBackend::Hc,
2804 strategy_tag: super::strategy::StrategyTag::Lazy,
2811 }
2812 }
2813
2814 fn configure(&mut self, config: HcConfig, tag: super::strategy::StrategyTag, window_log: u8) {
2815 use super::strategy::StrategyTag;
2816 self.strategy_tag = tag;
2820 let is_btultra2 = tag == StrategyTag::BtUltra2;
2821 let uses_bt = matches!(
2822 tag,
2823 StrategyTag::BtOpt | StrategyTag::BtUltra | StrategyTag::BtUltra2
2824 );
2825 let next_hash3_log = if is_btultra2 {
2826 HC3_HASH_LOG.min(window_log as usize)
2827 } else {
2828 0
2829 };
2830 let resize = self.table.hash_log != config.hash_log
2831 || self.table.chain_log != config.chain_log
2832 || self.table.hash3_log != next_hash3_log;
2833 self.table.hash_log = config.hash_log;
2834 self.table.chain_log = config.chain_log;
2835 self.table.hash3_log = next_hash3_log;
2836 self.hc.search_depth = if uses_bt {
2837 config.search_depth
2838 } else {
2839 config.search_depth.min(MAX_HC_SEARCH_DEPTH)
2840 };
2841 self.hc.target_len = config.target_len;
2842 self.table.search_depth = self.hc.search_depth;
2846 self.table.is_btultra2 = is_btultra2;
2847 self.table.uses_bt = uses_bt;
2848 match (&self.backend, self.table.uses_bt) {
2852 (HcBackend::Hc, true) => {
2853 self.backend = HcBackend::Bt(alloc::boxed::Box::new(super::bt::BtMatcher::new()));
2854 }
2855 (HcBackend::Bt(_), false) => {
2856 self.backend = HcBackend::Hc;
2857 }
2858 _ => {}
2859 }
2860 if resize && !self.table.hash_table.is_empty() {
2861 self.table.hash_table.clear();
2863 self.table.hash3_table.clear();
2864 self.table.chain_table.clear();
2865 }
2866 }
2867
2868 fn seed_dictionary_entropy(
2869 &mut self,
2870 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
2871 ll: Option<&crate::fse::fse_encoder::FSETable>,
2872 ml: Option<&crate::fse::fse_encoder::FSETable>,
2873 of: Option<&crate::fse::fse_encoder::FSETable>,
2874 ) {
2875 if let HcBackend::Bt(bt) = &mut self.backend {
2876 bt.opt_state.seed_dictionary_entropy(huff, ll, ml, of);
2877 }
2878 }
2879
2880 fn reset(&mut self, reuse_space: impl FnMut(Vec<u8>)) {
2881 self.table.reset(reuse_space);
2882 if let HcBackend::Bt(bt) = &mut self.backend {
2883 bt.reset();
2884 }
2885 }
2886
2887 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
2890 self.table.skip_matching(incompressible_hint);
2891 }
2892
2893 #[cfg(test)]
2899 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2900 use super::strategy::{self, StrategyTag};
2901 match self.strategy_tag {
2907 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
2908 self.start_matching_lazy(&mut handle_sequence)
2909 }
2910 StrategyTag::BtOpt => {
2911 self.start_matching_optimal::<strategy::BtOpt>(&mut handle_sequence)
2912 }
2913 StrategyTag::BtUltra => {
2914 self.start_matching_optimal::<strategy::BtUltra>(&mut handle_sequence)
2915 }
2916 StrategyTag::BtUltra2 => {
2917 self.start_matching_optimal::<strategy::BtUltra2>(&mut handle_sequence)
2918 }
2919 }
2920 }
2921
2922 pub(crate) fn start_matching_strategy<S: super::strategy::Strategy>(
2933 &mut self,
2934 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
2935 ) {
2936 debug_assert_eq!(
2937 self.table.uses_bt,
2938 S::USE_BT,
2939 "Strategy::USE_BT disagrees with runtime table.uses_bt at HC dispatch"
2940 );
2941 if S::USE_BT {
2942 self.start_matching_optimal::<S>(handle_sequence)
2943 } else {
2944 self.start_matching_lazy(handle_sequence)
2945 }
2946 }
2947
2948 fn start_matching_lazy(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2949 self.table.ensure_tables();
2950
2951 let current_len = self.table.window.back().unwrap().len();
2952 if current_len == 0 {
2953 return;
2954 }
2955
2956 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
2957 let current_abs_end = current_abs_start + current_len;
2958 self.table
2959 .backfill_boundary_positions(current_abs_start, current_abs_end);
2960
2961 let mut pos = 0usize;
2962 let mut literals_start = 0usize;
2963 while pos + HC_MIN_MATCH_LEN <= current_len {
2964 let abs_pos = current_abs_start + pos;
2965 let lit_len = pos - literals_start;
2966
2967 let best = self.hc.find_best_match(&self.table, abs_pos, lit_len);
2968 if let Some(candidate) = self.hc.pick_lazy_match(&self.table, abs_pos, lit_len, best) {
2969 self.table
2970 .insert_positions(abs_pos, candidate.start + candidate.match_len);
2971 let current = self.table.window.back().unwrap().as_slice();
2972 let start = candidate.start - current_abs_start;
2973 let literals = ¤t[literals_start..start];
2974 handle_sequence(Sequence::Triple {
2975 literals,
2976 offset: candidate.offset,
2977 match_len: candidate.match_len,
2978 });
2979 let _ = encode_offset_with_history(
2980 candidate.offset as u32,
2981 literals.len() as u32,
2982 &mut self.table.offset_hist,
2983 );
2984 pos = start + candidate.match_len;
2985 literals_start = pos;
2986 } else {
2987 self.table.insert_position(abs_pos);
2988 pos += 1;
2989 }
2990 }
2991
2992 while pos + 4 <= current_len {
2995 self.table.insert_position(current_abs_start + pos);
2996 pos += 1;
2997 }
2998
2999 if literals_start < current_len {
3000 let current = self.table.window.back().unwrap().as_slice();
3001 handle_sequence(Sequence::Literals {
3002 literals: ¤t[literals_start..],
3003 });
3004 }
3005 }
3006
3007 fn start_matching_optimal<S: super::strategy::Strategy>(
3008 &mut self,
3009 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
3010 ) {
3011 self.table.ensure_tables();
3012 let current_len = self.table.window.back().unwrap().len();
3013 if current_len == 0 {
3014 return;
3015 }
3016 let current_ptr = self.table.window.back().unwrap().as_ptr();
3017 let current = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
3021
3022 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
3023 let current_abs_end = current_abs_start + current_len;
3024 self.table
3025 .apply_limited_update_after_long_match(current_abs_start);
3026 let hash3_start_cursor = self
3027 .table
3028 .skip_insert_until_abs
3029 .max(self.table.history_abs_start);
3030 self.table
3031 .backfill_boundary_positions(current_abs_start, current_abs_end);
3032 self.table.next_to_update3 = hash3_start_cursor;
3033 let live_history = self.table.live_history();
3048 let history_abs_start = self.table.history_abs_start;
3049 self.backend.bt_mut().prepare_ldm_candidates(
3050 live_history,
3051 history_abs_start,
3052 current_abs_start,
3053 current_len,
3054 );
3055
3056 if self.should_run_btultra2_seed_pass::<S>(current_len) {
3057 self.run_btultra2_seed_pass(current, current_abs_start, current_len);
3058 }
3059
3060 let profile = HcOptimalCostProfile::const_for_strategy::<S>();
3066 let mut opt_state =
3067 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3068 opt_state.rescale_freqs(current, profile);
3069 let mut best_plan = core::mem::take(&mut self.backend.bt_mut().opt_segment_plan_scratch);
3070 best_plan.clear();
3071 let mut plan_reps = self.table.offset_hist;
3072 let (mut cursor, mut plan_litlen) = self
3073 .table
3074 .donor_opt_start_cursor_and_litlen(current_abs_start);
3075 let mut plan_literals_cursor = 0usize;
3076 let match_loop_limit = current_len.saturating_sub(8);
3077 while cursor < match_loop_limit {
3078 let remaining_len = current_len - cursor;
3079 let segment_abs_start = current_abs_start + cursor;
3080 let segment_start = best_plan.len();
3081 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3082 ¤t[cursor..],
3083 segment_abs_start,
3084 remaining_len,
3085 HcOptimalPlanState {
3086 reps: plan_reps,
3087 litlen: plan_litlen,
3088 profile,
3089 },
3090 &opt_state,
3091 &mut best_plan,
3092 );
3093 BtMatcher::update_plan_stats_segment(
3094 current,
3095 current_len,
3096 &best_plan[segment_start..],
3097 &mut plan_literals_cursor,
3098 &mut plan_reps,
3099 &mut opt_state,
3100 profile.accurate,
3101 );
3102 plan_reps = end_reps;
3103 plan_litlen = end_litlen;
3104 cursor += consumed_len;
3105 }
3106
3107 self.table
3108 .emit_optimal_plan(current_len, &best_plan, &mut handle_sequence);
3109 best_plan.clear();
3110 self.backend.bt_mut().opt_segment_plan_scratch = best_plan;
3111 self.backend.bt_mut().opt_state = opt_state;
3112 }
3113
3114 fn run_btultra2_seed_pass(
3115 &mut self,
3116 current: &[u8],
3117 current_abs_start: usize,
3118 current_len: usize,
3119 ) {
3120 type S = super::strategy::BtUltra2;
3125 let seed_profile = HcOptimalCostProfile::const_for_strategy::<S>();
3126 let mut opt_state =
3127 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3128 opt_state.rescale_freqs(current, seed_profile);
3129 let mut seed_reps = self.table.offset_hist;
3130 let (mut cursor, mut seed_litlen) = self
3131 .table
3132 .donor_opt_start_cursor_and_litlen(current_abs_start);
3133 let mut seed_literals_cursor = 0usize;
3134 let mut seed_plan = core::mem::take(&mut self.backend.bt_mut().opt_seed_plan_scratch);
3135 seed_plan.clear();
3136 let match_loop_limit = current_len.saturating_sub(8);
3137 while cursor < match_loop_limit {
3138 let remaining_len = current_len - cursor;
3139 let segment_abs_start = current_abs_start + cursor;
3140 let segment_start = seed_plan.len();
3141 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3142 ¤t[cursor..],
3143 segment_abs_start,
3144 remaining_len,
3145 HcOptimalPlanState {
3146 reps: seed_reps,
3147 litlen: seed_litlen,
3148 profile: seed_profile,
3149 },
3150 &opt_state,
3151 &mut seed_plan,
3152 );
3153 BtMatcher::update_plan_stats_segment(
3154 current,
3155 current_len,
3156 &seed_plan[segment_start..],
3157 &mut seed_literals_cursor,
3158 &mut seed_reps,
3159 &mut opt_state,
3160 seed_profile.accurate,
3161 );
3162 seed_plan.truncate(segment_start);
3163 seed_reps = end_reps;
3164 seed_litlen = end_litlen;
3165 cursor += consumed_len;
3166 }
3167 seed_plan.clear();
3168 self.backend.bt_mut().opt_seed_plan_scratch = seed_plan;
3169 self.backend.bt_mut().opt_state = opt_state;
3170
3171 self.table.position_base = self.table.history_abs_start;
3174 self.table.index_shift = current_len;
3175 self.table.next_to_update3 = current_abs_start;
3176 self.table.skip_insert_until_abs = current_abs_start;
3177 self.table.allow_zero_relative_position = true;
3183 }
3184
3185 fn build_optimal_plan<S: super::strategy::Strategy>(
3186 &mut self,
3187 current: &[u8],
3188 current_abs_start: usize,
3189 current_len: usize,
3190 initial_state: HcOptimalPlanState,
3191 stats: &HcOptState,
3192 out: &mut Vec<HcOptimalSequence>,
3193 ) -> (u32, [u32; 3], usize, usize) {
3194 debug_assert!(S::USE_BT, "build_optimal_plan called on non-BT strategy");
3195 debug_assert_eq!(initial_state.profile.accurate, S::ACCURATE_PRICE);
3196 debug_assert_eq!(
3197 initial_state.profile.favor_small_offsets,
3198 S::FAVOR_SMALL_OFFSETS
3199 );
3200 let profile = initial_state.profile;
3207 match (profile.accurate, profile.favor_small_offsets) {
3208 (true, false) => self.build_optimal_plan_impl::<S, true, false>(
3209 current,
3210 current_abs_start,
3211 current_len,
3212 initial_state,
3213 stats,
3214 out,
3215 ),
3216 (true, true) => self.build_optimal_plan_impl::<S, true, true>(
3217 current,
3218 current_abs_start,
3219 current_len,
3220 initial_state,
3221 stats,
3222 out,
3223 ),
3224 (false, false) => self.build_optimal_plan_impl::<S, false, false>(
3225 current,
3226 current_abs_start,
3227 current_len,
3228 initial_state,
3229 stats,
3230 out,
3231 ),
3232 (false, true) => self.build_optimal_plan_impl::<S, false, true>(
3233 current,
3234 current_abs_start,
3235 current_len,
3236 initial_state,
3237 stats,
3238 out,
3239 ),
3240 }
3241 }
3242
3243 #[inline(always)]
3252 fn build_optimal_plan_impl<
3253 S: super::strategy::Strategy,
3254 const ACCURATE_PRICE: bool,
3255 const FAVOR_SMALL_OFFSETS: bool,
3256 >(
3257 &mut self,
3258 current: &[u8],
3259 current_abs_start: usize,
3260 current_len: usize,
3261 initial_state: HcOptimalPlanState,
3262 stats: &HcOptState,
3263 out: &mut Vec<HcOptimalSequence>,
3264 ) -> (u32, [u32; 3], usize, usize) {
3265 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3266 unsafe {
3267 self.build_optimal_plan_impl_neon::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3268 current,
3269 current_abs_start,
3270 current_len,
3271 initial_state,
3272 stats,
3273 out,
3274 )
3275 }
3276 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3277 {
3278 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3279 match select_kernel() {
3280 FastpathKernel::Avx2Bmi2 => unsafe {
3281 self.build_optimal_plan_impl_avx2_bmi2::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3282 current,
3283 current_abs_start,
3284 current_len,
3285 initial_state,
3286 stats,
3287 out,
3288 )
3289 },
3290 FastpathKernel::Sse42 => unsafe {
3291 self.build_optimal_plan_impl_sse42::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3292 current,
3293 current_abs_start,
3294 current_len,
3295 initial_state,
3296 stats,
3297 out,
3298 )
3299 },
3300 FastpathKernel::Scalar => self
3301 .build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3302 current,
3303 current_abs_start,
3304 current_len,
3305 initial_state,
3306 stats,
3307 out,
3308 ),
3309 }
3310 }
3311 #[cfg(not(any(
3312 all(target_arch = "aarch64", target_endian = "little"),
3313 target_arch = "x86",
3314 target_arch = "x86_64"
3315 )))]
3316 {
3317 self.build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3318 current,
3319 current_abs_start,
3320 current_len,
3321 initial_state,
3322 stats,
3323 out,
3324 )
3325 }
3326 }
3327
3328 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3332 #[target_feature(enable = "neon")]
3333 unsafe fn build_optimal_plan_impl_neon<
3334 S: super::strategy::Strategy,
3335 const ACCURATE_PRICE: bool,
3336 const FAVOR_SMALL_OFFSETS: bool,
3337 >(
3338 &mut self,
3339 current: &[u8],
3340 current_abs_start: usize,
3341 current_len: usize,
3342 initial_state: HcOptimalPlanState,
3343 stats: &HcOptState,
3344 out: &mut Vec<HcOptimalSequence>,
3345 ) -> (u32, [u32; 3], usize, usize) {
3346 build_optimal_plan_impl_body!(
3347 self,
3348 S,
3349 current,
3350 current_abs_start,
3351 current_len,
3352 initial_state,
3353 stats,
3354 out,
3355 collect_optimal_candidates_initialized_neon,
3356 )
3357 }
3358
3359 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3360 #[target_feature(enable = "sse4.2")]
3361 unsafe fn build_optimal_plan_impl_sse42<
3362 S: super::strategy::Strategy,
3363 const ACCURATE_PRICE: bool,
3364 const FAVOR_SMALL_OFFSETS: bool,
3365 >(
3366 &mut self,
3367 current: &[u8],
3368 current_abs_start: usize,
3369 current_len: usize,
3370 initial_state: HcOptimalPlanState,
3371 stats: &HcOptState,
3372 out: &mut Vec<HcOptimalSequence>,
3373 ) -> (u32, [u32; 3], usize, usize) {
3374 build_optimal_plan_impl_body!(
3375 self,
3376 S,
3377 current,
3378 current_abs_start,
3379 current_len,
3380 initial_state,
3381 stats,
3382 out,
3383 collect_optimal_candidates_initialized_sse42,
3384 )
3385 }
3386
3387 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3388 #[target_feature(enable = "avx2,bmi2")]
3389 unsafe fn build_optimal_plan_impl_avx2_bmi2<
3390 S: super::strategy::Strategy,
3391 const ACCURATE_PRICE: bool,
3392 const FAVOR_SMALL_OFFSETS: bool,
3393 >(
3394 &mut self,
3395 current: &[u8],
3396 current_abs_start: usize,
3397 current_len: usize,
3398 initial_state: HcOptimalPlanState,
3399 stats: &HcOptState,
3400 out: &mut Vec<HcOptimalSequence>,
3401 ) -> (u32, [u32; 3], usize, usize) {
3402 build_optimal_plan_impl_body!(
3403 self,
3404 S,
3405 current,
3406 current_abs_start,
3407 current_len,
3408 initial_state,
3409 stats,
3410 out,
3411 collect_optimal_candidates_initialized_avx2_bmi2,
3412 )
3413 }
3414
3415 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3416 #[allow(unused_unsafe)]
3420 fn build_optimal_plan_impl_scalar<
3421 S: super::strategy::Strategy,
3422 const ACCURATE_PRICE: bool,
3423 const FAVOR_SMALL_OFFSETS: bool,
3424 >(
3425 &mut self,
3426 current: &[u8],
3427 current_abs_start: usize,
3428 current_len: usize,
3429 initial_state: HcOptimalPlanState,
3430 stats: &HcOptState,
3431 out: &mut Vec<HcOptimalSequence>,
3432 ) -> (u32, [u32; 3], usize, usize) {
3433 build_optimal_plan_impl_body!(
3434 self,
3435 S,
3436 current,
3437 current_abs_start,
3438 current_len,
3439 initial_state,
3440 stats,
3441 out,
3442 collect_optimal_candidates_initialized_scalar,
3443 )
3444 }
3445
3446 #[cfg(test)]
3447 fn collect_optimal_candidates(
3448 &mut self,
3449 abs_pos: usize,
3450 current_abs_end: usize,
3451 profile: HcOptimalCostProfile,
3452 query: HcCandidateQuery,
3453 out: &mut Vec<MatchCandidate>,
3454 ) {
3455 use super::strategy::{self, StrategyTag};
3456 self.table.ensure_tables();
3457 match self.strategy_tag {
3463 StrategyTag::BtUltra2 => self
3464 .collect_optimal_candidates_initialized::<strategy::BtUltra2, true>(
3465 abs_pos,
3466 current_abs_end,
3467 profile,
3468 query,
3469 out,
3470 ),
3471 StrategyTag::BtUltra => self
3472 .collect_optimal_candidates_initialized::<strategy::BtUltra, true>(
3473 abs_pos,
3474 current_abs_end,
3475 profile,
3476 query,
3477 out,
3478 ),
3479 StrategyTag::BtOpt => self
3480 .collect_optimal_candidates_initialized::<strategy::BtOpt, true>(
3481 abs_pos,
3482 current_abs_end,
3483 profile,
3484 query,
3485 out,
3486 ),
3487 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
3488 self.collect_optimal_candidates_initialized::<strategy::Lazy, false>(
3489 abs_pos,
3490 current_abs_end,
3491 profile,
3492 query,
3493 out,
3494 )
3495 }
3496 }
3497 }
3498
3499 #[allow(dead_code)]
3509 #[inline(always)]
3510 fn collect_optimal_candidates_initialized<
3511 S: super::strategy::Strategy,
3512 const USE_BT_MATCHFINDER: bool,
3513 >(
3514 &mut self,
3515 abs_pos: usize,
3516 current_abs_end: usize,
3517 profile: HcOptimalCostProfile,
3518 query: HcCandidateQuery,
3519 out: &mut Vec<MatchCandidate>,
3520 ) {
3521 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3522 unsafe {
3523 self.collect_optimal_candidates_initialized_neon::<S, USE_BT_MATCHFINDER>(
3524 abs_pos,
3525 current_abs_end,
3526 profile,
3527 query,
3528 out,
3529 )
3530 }
3531 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3532 {
3533 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3534 match select_kernel() {
3535 FastpathKernel::Avx2Bmi2 => unsafe {
3536 self.collect_optimal_candidates_initialized_avx2_bmi2::<S, USE_BT_MATCHFINDER>(
3537 abs_pos,
3538 current_abs_end,
3539 profile,
3540 query,
3541 out,
3542 )
3543 },
3544 FastpathKernel::Sse42 => unsafe {
3545 self.collect_optimal_candidates_initialized_sse42::<S, USE_BT_MATCHFINDER>(
3546 abs_pos,
3547 current_abs_end,
3548 profile,
3549 query,
3550 out,
3551 )
3552 },
3553 FastpathKernel::Scalar => self
3554 .collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3555 abs_pos,
3556 current_abs_end,
3557 profile,
3558 query,
3559 out,
3560 ),
3561 }
3562 }
3563 #[cfg(not(any(
3564 all(target_arch = "aarch64", target_endian = "little"),
3565 target_arch = "x86",
3566 target_arch = "x86_64"
3567 )))]
3568 {
3569 self.collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3570 abs_pos,
3571 current_abs_end,
3572 profile,
3573 query,
3574 out,
3575 )
3576 }
3577 }
3578
3579 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3585 #[target_feature(enable = "neon")]
3586 unsafe fn collect_optimal_candidates_initialized_neon<
3587 S: super::strategy::Strategy,
3588 const USE_BT_MATCHFINDER: bool,
3589 >(
3590 &mut self,
3591 abs_pos: usize,
3592 current_abs_end: usize,
3593 profile: HcOptimalCostProfile,
3594 query: HcCandidateQuery,
3595 out: &mut Vec<MatchCandidate>,
3596 ) {
3597 collect_optimal_candidates_initialized_body!(
3598 self,
3599 S,
3600 abs_pos,
3601 current_abs_end,
3602 profile,
3603 query,
3604 out,
3605 USE_BT_MATCHFINDER,
3606 bt_update_tree_until_neon,
3607 bt_insert_and_collect_matches_neon,
3608 for_each_repcode_candidate_with_reps_neon,
3609 hash3_candidate_neon,
3610 crate::encoding::fastpath::neon::common_prefix_len_ptr,
3611 )
3612 }
3613
3614 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3615 #[target_feature(enable = "sse4.2")]
3616 unsafe fn collect_optimal_candidates_initialized_sse42<
3617 S: super::strategy::Strategy,
3618 const USE_BT_MATCHFINDER: bool,
3619 >(
3620 &mut self,
3621 abs_pos: usize,
3622 current_abs_end: usize,
3623 profile: HcOptimalCostProfile,
3624 query: HcCandidateQuery,
3625 out: &mut Vec<MatchCandidate>,
3626 ) {
3627 collect_optimal_candidates_initialized_body!(
3628 self,
3629 S,
3630 abs_pos,
3631 current_abs_end,
3632 profile,
3633 query,
3634 out,
3635 USE_BT_MATCHFINDER,
3636 bt_update_tree_until_sse42,
3637 bt_insert_and_collect_matches_sse42,
3638 for_each_repcode_candidate_with_reps_sse42,
3639 hash3_candidate_sse42,
3640 crate::encoding::fastpath::sse42::common_prefix_len_ptr,
3641 )
3642 }
3643
3644 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3645 #[target_feature(enable = "avx2,bmi2")]
3646 unsafe fn collect_optimal_candidates_initialized_avx2_bmi2<
3647 S: super::strategy::Strategy,
3648 const USE_BT_MATCHFINDER: bool,
3649 >(
3650 &mut self,
3651 abs_pos: usize,
3652 current_abs_end: usize,
3653 profile: HcOptimalCostProfile,
3654 query: HcCandidateQuery,
3655 out: &mut Vec<MatchCandidate>,
3656 ) {
3657 collect_optimal_candidates_initialized_body!(
3658 self,
3659 S,
3660 abs_pos,
3661 current_abs_end,
3662 profile,
3663 query,
3664 out,
3665 USE_BT_MATCHFINDER,
3666 bt_update_tree_until_avx2_bmi2,
3667 bt_insert_and_collect_matches_avx2_bmi2,
3668 for_each_repcode_candidate_with_reps_avx2_bmi2,
3669 hash3_candidate_avx2_bmi2,
3670 crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
3671 )
3672 }
3673
3674 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3675 #[allow(unused_unsafe)]
3678 fn collect_optimal_candidates_initialized_scalar<
3679 S: super::strategy::Strategy,
3680 const USE_BT_MATCHFINDER: bool,
3681 >(
3682 &mut self,
3683 abs_pos: usize,
3684 current_abs_end: usize,
3685 profile: HcOptimalCostProfile,
3686 query: HcCandidateQuery,
3687 out: &mut Vec<MatchCandidate>,
3688 ) {
3689 collect_optimal_candidates_initialized_body!(
3690 self,
3691 S,
3692 abs_pos,
3693 current_abs_end,
3694 profile,
3695 query,
3696 out,
3697 USE_BT_MATCHFINDER,
3698 bt_update_tree_until_scalar,
3699 bt_insert_and_collect_matches_scalar,
3700 for_each_repcode_candidate_with_reps_scalar,
3701 hash3_candidate_scalar,
3702 crate::encoding::fastpath::scalar::common_prefix_len_ptr,
3703 )
3704 }
3705}
3706
3707#[test]
3708fn matches() {
3709 let mut matcher = MatchGenerator::new(1000);
3710 let mut original_data = Vec::new();
3711 let mut reconstructed = Vec::new();
3712
3713 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
3714 Sequence::Literals { literals } => {
3715 assert!(!literals.is_empty());
3716 reconstructed.extend_from_slice(literals);
3717 }
3718 Sequence::Triple {
3719 literals,
3720 offset,
3721 match_len,
3722 } => {
3723 assert!(offset > 0);
3724 assert!(match_len >= MIN_MATCH_LEN);
3725 reconstructed.extend_from_slice(literals);
3726 assert!(offset <= reconstructed.len());
3727 let start = reconstructed.len() - offset;
3728 for i in 0..match_len {
3729 let byte = reconstructed[start + i];
3730 reconstructed.push(byte);
3731 }
3732 }
3733 };
3734
3735 matcher.add_data(
3736 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
3737 SuffixStore::with_capacity(100),
3738 |_, _| {},
3739 );
3740 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
3741
3742 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3743
3744 assert!(!matcher.next_sequence(|_| {}));
3745
3746 matcher.add_data(
3747 alloc::vec![
3748 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3749 ],
3750 SuffixStore::with_capacity(100),
3751 |_, _| {},
3752 );
3753 original_data.extend_from_slice(&[
3754 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3755 ]);
3756
3757 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3758 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3759 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3760 assert!(!matcher.next_sequence(|_| {}));
3761
3762 matcher.add_data(
3763 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
3764 SuffixStore::with_capacity(100),
3765 |_, _| {},
3766 );
3767 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
3768
3769 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3770 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3771 assert!(!matcher.next_sequence(|_| {}));
3772
3773 matcher.add_data(
3774 alloc::vec![0, 0, 0, 0, 0],
3775 SuffixStore::with_capacity(100),
3776 |_, _| {},
3777 );
3778 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
3779
3780 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3781 assert!(!matcher.next_sequence(|_| {}));
3782
3783 matcher.add_data(
3784 alloc::vec![7, 8, 9, 10, 11],
3785 SuffixStore::with_capacity(100),
3786 |_, _| {},
3787 );
3788 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
3789
3790 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3791 assert!(!matcher.next_sequence(|_| {}));
3792
3793 matcher.add_data(
3794 alloc::vec![1, 3, 5, 7, 9],
3795 SuffixStore::with_capacity(100),
3796 |_, _| {},
3797 );
3798 matcher.skip_matching();
3799 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3800 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
3801 assert!(!matcher.next_sequence(|_| {}));
3802
3803 matcher.add_data(
3804 alloc::vec![1, 3, 5, 7, 9],
3805 SuffixStore::with_capacity(100),
3806 |_, _| {},
3807 );
3808 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3809
3810 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3811 assert!(!matcher.next_sequence(|_| {}));
3812
3813 matcher.add_data(
3814 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
3815 SuffixStore::with_capacity(100),
3816 |_, _| {},
3817 );
3818 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
3819
3820 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3821 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3822 assert!(!matcher.next_sequence(|_| {}));
3823
3824 assert_eq!(reconstructed, original_data);
3825}
3826
3827#[test]
3828fn dfast_matches_roundtrip_multi_block_pattern() {
3829 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
3830 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3831 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3832
3833 let mut matcher = DfastMatchGenerator::new(1 << 22);
3834 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3835 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3836 Sequence::Triple {
3837 literals,
3838 offset,
3839 match_len,
3840 } => {
3841 decoded.extend_from_slice(literals);
3842 let start = decoded.len() - offset;
3843 for i in 0..match_len {
3844 let byte = decoded[start + i];
3845 decoded.push(byte);
3846 }
3847 }
3848 };
3849
3850 matcher.add_data(first_block.clone(), |_| {});
3851 let mut history = Vec::new();
3852 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3853 assert_eq!(history, first_block);
3854
3855 matcher.add_data(second_block.clone(), |_| {});
3856 let prefix_len = history.len();
3857 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3858
3859 assert_eq!(&history[prefix_len..], second_block.as_slice());
3860}
3861
3862#[test]
3879fn dfast_accepts_exact_five_byte_match() {
3880 let mut data = Vec::new();
3889 data.extend_from_slice(b"ABCDE"); data.extend_from_slice(b"!!!!!!!!!!!!!!!!!!!!!!!"); data.extend_from_slice(b"ABCDE"); data.push(b'F'); assert_eq!(data.len(), 34);
3894
3895 let mut matcher = DfastMatchGenerator::new(1 << 22);
3896 matcher.add_data(data.clone(), |_| {});
3897
3898 let mut saw_five_byte_match = false;
3899 let mut saw_longer_match = false;
3900 matcher.start_matching(|seq| {
3901 if let Sequence::Triple {
3902 offset, match_len, ..
3903 } = seq
3904 {
3905 if offset == 28 && match_len == 5 {
3906 saw_five_byte_match = true;
3907 } else if offset == 28 && match_len > 5 {
3908 saw_longer_match = true;
3909 }
3910 }
3911 });
3912
3913 assert!(
3914 saw_five_byte_match,
3915 "dfast must accept the exact-5-byte match — a 6-byte floor would skip it"
3916 );
3917 assert!(
3918 !saw_longer_match,
3919 "fixture pinned to length 5 — byte 33 ('F') must terminate the extension"
3920 );
3921}
3922
3923#[test]
3924fn driver_switches_backends_and_initializes_dfast_via_reset() {
3925 let mut driver = MatchGeneratorDriver::new(32, 2);
3926
3927 driver.reset(CompressionLevel::Default);
3928 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Dfast);
3929 assert_eq!(driver.window_size(), (1u64 << 22));
3930
3931 let mut first = driver.get_next_space();
3932 first[..12].copy_from_slice(b"abcabcabcabc");
3933 first.truncate(12);
3934 driver.commit_space(first);
3935 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
3936 driver.skip_matching_with_hint(None);
3937
3938 let mut second = driver.get_next_space();
3939 second[..12].copy_from_slice(b"abcabcabcabc");
3940 second.truncate(12);
3941 driver.commit_space(second);
3942
3943 let mut reconstructed = b"abcabcabcabc".to_vec();
3944 driver.start_matching(|seq| match seq {
3945 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3946 Sequence::Triple {
3947 literals,
3948 offset,
3949 match_len,
3950 } => {
3951 reconstructed.extend_from_slice(literals);
3952 let start = reconstructed.len() - offset;
3953 for i in 0..match_len {
3954 let byte = reconstructed[start + i];
3955 reconstructed.push(byte);
3956 }
3957 }
3958 });
3959 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
3960
3961 driver.reset(CompressionLevel::Fastest);
3962 assert_eq!(driver.window_size(), (1u64 << 17));
3963}
3964
3965#[test]
3966fn driver_level4_selects_row_backend() {
3967 let mut driver = MatchGeneratorDriver::new(32, 2);
3968 driver.reset(CompressionLevel::Level(4));
3969 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
3970}
3971
3972#[test]
3973fn driver_reset_keeps_strategy_tag_in_sync_with_active_backend() {
3974 use super::strategy::StrategyTag;
3975
3976 fn check(level: CompressionLevel, expected: StrategyTag) {
3977 let mut driver = MatchGeneratorDriver::new(32, 2);
3978 driver.reset(level);
3979 assert_eq!(
3980 driver.strategy_tag, expected,
3981 "strategy_tag wrong for {level:?}"
3982 );
3983 assert_eq!(
3984 driver.strategy_tag.backend(),
3985 driver.active_backend(),
3986 "strategy_tag backend disagrees with active_backend for {level:?}"
3987 );
3988 }
3989
3990 check(CompressionLevel::Level(1), StrategyTag::Fast);
3991 check(CompressionLevel::Level(2), StrategyTag::Dfast);
3992 check(CompressionLevel::Level(3), StrategyTag::Dfast);
3993 check(CompressionLevel::Level(4), StrategyTag::Greedy);
3994 check(CompressionLevel::Level(7), StrategyTag::Lazy);
3995 check(CompressionLevel::Level(15), StrategyTag::Lazy);
3996 check(CompressionLevel::Level(16), StrategyTag::BtOpt);
3997 check(CompressionLevel::Level(18), StrategyTag::BtUltra);
3998 check(CompressionLevel::Level(22), StrategyTag::BtUltra2);
3999 check(CompressionLevel::Fastest, StrategyTag::Fast);
4000 check(CompressionLevel::Default, StrategyTag::Dfast);
4001 check(CompressionLevel::Better, StrategyTag::Lazy);
4002 check(CompressionLevel::Best, StrategyTag::Lazy);
4003}
4004
4005#[test]
4006fn level_16_17_map_to_btopt_strategy() {
4007 use super::strategy::{BackendTag, StrategyTag};
4008 let p16 = resolve_level_params(CompressionLevel::Level(16), None);
4009 let p17 = resolve_level_params(CompressionLevel::Level(17), None);
4010 assert_eq!(p16.backend(), BackendTag::HashChain);
4011 assert_eq!(p17.backend(), BackendTag::HashChain);
4012 assert_eq!(StrategyTag::for_level(16), StrategyTag::BtOpt);
4013 assert_eq!(StrategyTag::for_level(17), StrategyTag::BtOpt);
4014}
4015
4016#[test]
4017fn level_18_19_map_to_btultra_strategy() {
4018 use super::strategy::{BackendTag, StrategyTag};
4019 let p18 = resolve_level_params(CompressionLevel::Level(18), None);
4020 let p19 = resolve_level_params(CompressionLevel::Level(19), None);
4021 assert_eq!(p18.backend(), BackendTag::HashChain);
4022 assert_eq!(p19.backend(), BackendTag::HashChain);
4023 assert_eq!(StrategyTag::for_level(18), StrategyTag::BtUltra);
4024 assert_eq!(StrategyTag::for_level(19), StrategyTag::BtUltra);
4025}
4026
4027#[test]
4028fn level_20_22_map_to_btultra2_strategy() {
4029 use super::strategy::{BackendTag, StrategyTag};
4030 for level in 20..=22 {
4031 let params = resolve_level_params(CompressionLevel::Level(level), None);
4032 assert_eq!(params.backend(), BackendTag::HashChain);
4033 assert_eq!(StrategyTag::for_level(level as u8), StrategyTag::BtUltra2);
4034 }
4035}
4036
4037#[test]
4038fn level22_uses_donor_target_length_and_large_input_tables() {
4039 let params = resolve_level_params(CompressionLevel::Level(22), None);
4040 assert_eq!(params.window_log, 27);
4041 assert_eq!(params.hc.hash_log, 25);
4042 assert_eq!(params.hc.chain_log, 27);
4043 assert_eq!(params.hc.search_depth, 1 << 9);
4044 assert_eq!(params.hc.target_len, 999);
4045}
4046
4047#[test]
4048fn level22_source_size_hint_uses_donor_btultra2_tiers() {
4049 let p16k = resolve_level_params(CompressionLevel::Level(22), Some(16 * 1024));
4050 assert_eq!(p16k.window_log, 14);
4051 assert_eq!(p16k.hc.hash_log, 15);
4052 assert_eq!(p16k.hc.chain_log, 15);
4053 assert_eq!(p16k.hc.search_depth, 1 << 10);
4054 assert_eq!(p16k.hc.target_len, 999);
4055
4056 let p128k = resolve_level_params(CompressionLevel::Level(22), Some(128 * 1024));
4057 assert_eq!(p128k.window_log, 17);
4058 assert_eq!(p128k.hc.hash_log, 17);
4059 assert_eq!(p128k.hc.chain_log, 18);
4060 assert_eq!(p128k.hc.search_depth, 1 << 11);
4061 assert_eq!(p128k.hc.target_len, 999);
4062
4063 let p256k = resolve_level_params(CompressionLevel::Level(22), Some(256 * 1024));
4064 assert_eq!(p256k.window_log, 18);
4065 assert_eq!(p256k.hc.hash_log, 19);
4066 assert_eq!(p256k.hc.chain_log, 19);
4067 assert_eq!(p256k.hc.search_depth, 1 << 13);
4068 assert_eq!(p256k.hc.target_len, 999);
4069}
4070
4071#[test]
4072fn level22_small_source_size_hint_matches_donor_cparams() {
4073 use zstd::zstd_safe::zstd_sys;
4074
4075 let source_size = 15_027u64;
4076 let donor = unsafe { zstd_sys::ZSTD_getCParams(22, source_size, 0) };
4077 let params = resolve_level_params(CompressionLevel::Level(22), Some(source_size));
4078
4079 assert_eq!(params.window_log as u32, donor.windowLog);
4080 assert_eq!(params.hc.chain_log as u32, donor.chainLog);
4081 assert_eq!(params.hc.hash_log as u32, donor.hashLog);
4082 assert_eq!(params.hc.search_depth as u32, 1u32 << donor.searchLog);
4083 assert_eq!(HC_OPT_MIN_MATCH_LEN as u32, donor.minMatch);
4084 assert_eq!(params.hc.target_len as u32, donor.targetLength);
4085}
4086
4087#[test]
4088fn level22_small_source_uses_window_bounded_hash3_log() {
4089 let mut hc = HcMatchGenerator::new(1 << 14);
4090 hc.configure(
4091 BTULTRA2_HC_CONFIG_L22_16K,
4092 super::strategy::StrategyTag::BtUltra2,
4093 14,
4094 );
4095 assert_eq!(hc.table.hash3_log, 14);
4096
4097 hc.configure(
4098 BTULTRA2_HC_CONFIG_L22,
4099 super::strategy::StrategyTag::BtUltra2,
4100 27,
4101 );
4102 assert_eq!(hc.table.hash3_log, HC3_HASH_LOG);
4103}
4104
4105#[test]
4106fn btultra2_seed_pass_initializes_opt_state() {
4107 let mut hc = HcMatchGenerator::new(1 << 20);
4108 hc.configure(
4109 BTULTRA2_HC_CONFIG,
4110 super::strategy::StrategyTag::BtUltra2,
4111 26,
4112 );
4113 let data: Vec<u8> = (0..32 * 1024).map(|i| (i % 251) as u8).collect();
4114 hc.table.add_data(data, |_| {});
4115 hc.start_matching(|_| {});
4116 assert!(
4117 hc.backend.bt_mut().opt_state.lit_length_sum > 0,
4118 "btultra2 first block should seed non-zero sequence statistics"
4119 );
4120 assert!(
4121 hc.backend.bt_mut().opt_state.off_code_sum > 0,
4122 "btultra2 first block should seed offset-code statistics"
4123 );
4124}
4125
4126#[test]
4127fn btultra2_profile_disables_small_offset_handicap() {
4128 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4134 assert!(
4135 !profile.favor_small_offsets,
4136 "btultra2 should match donor opt2 offset pricing"
4137 );
4138 assert!(
4139 profile.accurate,
4140 "btultra2 should use donor opt2 accurate pricing"
4141 );
4142}
4143
4144#[test]
4145fn btultra_profile_keeps_donor_search_depth_budget() {
4146 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra>();
4147 assert_eq!(
4148 p.max_chain_depth, 32,
4149 "btultra should not cap chain depth below donor opt2 search budget"
4150 );
4151}
4152
4153#[test]
4154fn btopt_profile_keeps_donor_search_depth_budget() {
4155 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtOpt>();
4156 assert_eq!(
4157 p.max_chain_depth, 32,
4158 "btopt should not cap chain depth below donor btopt search budget"
4159 );
4160}
4161
4162#[test]
4163fn sufficient_match_len_is_clamped_by_target_len() {
4164 let mut hc = HcMatchGenerator::new(1 << 20);
4165 hc.configure(
4166 BTULTRA2_HC_CONFIG,
4167 super::strategy::StrategyTag::BtUltra2,
4168 26,
4169 );
4170 hc.hc.target_len = 13;
4171 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4172 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 13);
4173}
4174
4175#[test]
4176fn opt_modes_use_target_len_as_sufficient_len() {
4177 use super::strategy;
4178 let mut hc = HcMatchGenerator::new(1 << 20);
4179 hc.hc.target_len = 57;
4180 let profiles = [
4181 HcOptimalCostProfile::const_for_strategy::<strategy::BtOpt>(),
4182 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra>(),
4183 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra2>(),
4184 ];
4185 for profile in profiles {
4186 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 57);
4187 }
4188}
4189
4190#[test]
4191fn sufficient_match_len_is_capped_by_opt_num() {
4192 let mut hc = HcMatchGenerator::new(1 << 20);
4193 hc.hc.target_len = usize::MAX / 2;
4194 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4195 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), HC_OPT_NUM - 1);
4196}
4197
4198#[test]
4199#[allow(clippy::borrow_deref_ref)]
4200fn dictionary_entropy_seed_initializes_opt_state_from_tables() {
4201 let mut hc = HcMatchGenerator::new(1 << 20);
4202 hc.configure(
4203 BTULTRA2_HC_CONFIG,
4204 super::strategy::StrategyTag::BtUltra2,
4205 26,
4206 );
4207
4208 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
4209 b"aaabbbbccccddddeeeeefffffgggg",
4210 );
4211 let ll = crate::fse::fse_encoder::default_ll_table();
4212 let ml = crate::fse::fse_encoder::default_ml_table();
4213 let of = crate::fse::fse_encoder::default_of_table();
4214 hc.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
4215
4216 hc.backend.bt_mut().opt_state.rescale_freqs(
4217 b"abcd",
4218 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4219 );
4220
4221 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4222 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4223 1, 1, 1, 1, 1, 1,
4224 ];
4225
4226 assert_ne!(
4227 hc.backend.bt_mut().opt_state.lit_length_freq,
4228 base_ll_freqs,
4229 "dictionary entropy should override fallback LL bootstrap frequencies"
4230 );
4231 assert!(
4232 hc.backend
4233 .bt_mut()
4234 .opt_state
4235 .match_length_freq
4236 .iter()
4237 .any(|&v| v != 1),
4238 "dictionary entropy should seed non-uniform ML frequencies"
4239 );
4240 assert_ne!(
4241 hc.backend.bt_mut().opt_state.off_code_freq[0],
4242 6,
4243 "dictionary entropy should override fallback OF bootstrap frequencies"
4244 );
4245}
4246
4247#[test]
4248#[allow(clippy::borrow_deref_ref)]
4249fn dictionary_fse_seed_applies_without_huffman_seed() {
4250 let mut hc = HcMatchGenerator::new(1 << 20);
4251 hc.configure(
4252 BTULTRA2_HC_CONFIG,
4253 super::strategy::StrategyTag::BtUltra2,
4254 26,
4255 );
4256
4257 let ll = crate::fse::fse_encoder::default_ll_table();
4258 let ml = crate::fse::fse_encoder::default_ml_table();
4259 let of = crate::fse::fse_encoder::default_of_table();
4260 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4261 hc.backend.bt_mut().opt_state.rescale_freqs(
4262 b"abcd",
4263 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4264 );
4265
4266 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4267 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4268 1, 1, 1, 1, 1, 1,
4269 ];
4270 assert_ne!(
4271 hc.backend.bt_mut().opt_state.lit_length_freq,
4272 base_ll_freqs,
4273 "FSE seed should still override LL bootstrap frequencies without huffman seed"
4274 );
4275 assert!(
4276 hc.backend
4277 .bt_mut()
4278 .opt_state
4279 .match_length_freq
4280 .iter()
4281 .any(|&v| v != 1),
4282 "FSE seed should still seed non-uniform ML frequencies"
4283 );
4284 assert_ne!(
4285 hc.backend.bt_mut().opt_state.off_code_freq[0],
4286 6,
4287 "FSE seed should still override OF bootstrap frequencies without huffman seed"
4288 );
4289}
4290
4291#[test]
4292#[allow(clippy::borrow_deref_ref)]
4293fn dictionary_seed_overrides_predef_price_mode_on_tiny_input() {
4294 let mut hc = HcMatchGenerator::new(1 << 20);
4295 hc.configure(
4296 BTULTRA2_HC_CONFIG,
4297 super::strategy::StrategyTag::BtUltra2,
4298 26,
4299 );
4300
4301 let ll = crate::fse::fse_encoder::default_ll_table();
4302 let ml = crate::fse::fse_encoder::default_ml_table();
4303 let of = crate::fse::fse_encoder::default_of_table();
4304 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4305 hc.backend.bt_mut().opt_state.rescale_freqs(
4306 b"abc",
4307 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4308 );
4309 assert!(
4310 matches!(
4311 hc.backend.bt_mut().opt_state.price_type,
4312 HcOptPriceType::Dynamic
4313 ),
4314 "dictionary-seeded first block should stay in dynamic mode even for tiny src"
4315 );
4316}
4317
4318#[test]
4319fn lit_length_price_blocksize_max_costs_one_extra_bit() {
4320 let profile_predef = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4321 let mut stats_predef = HcOptState::new();
4322 stats_predef.price_type = HcOptPriceType::Predefined;
4323 let predef_max = profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX);
4324 let predef_prev =
4325 profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX.saturating_sub(1));
4326 assert_eq!(
4327 predef_max,
4328 predef_prev + HC_BITCOST_MULTIPLIER,
4329 "predefined litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4330 );
4331
4332 let profile_dyn = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4333 let mut stats_dyn = HcOptState::new();
4334 stats_dyn.price_type = HcOptPriceType::Dynamic;
4335 stats_dyn.lit_length_freq.fill(1);
4336 stats_dyn.lit_length_sum = (HC_MAX_LL + 1) as u32;
4337 stats_dyn.match_length_freq.fill(1);
4338 stats_dyn.match_length_sum = (HC_MAX_ML + 1) as u32;
4339 stats_dyn.off_code_freq.fill(1);
4340 stats_dyn.off_code_sum = (HC_MAX_OFF + 1) as u32;
4341 stats_dyn.lit_freq.fill(1);
4342 stats_dyn.lit_sum = (HC_MAX_LIT + 1) as u32;
4343 stats_dyn.set_base_prices(true);
4344 let dyn_max = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX);
4345 let dyn_prev = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX.saturating_sub(1));
4346 assert_eq!(
4347 dyn_max,
4348 dyn_prev + HC_BITCOST_MULTIPLIER,
4349 "dynamic litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4350 );
4351}
4352
4353#[test]
4354#[allow(clippy::borrow_deref_ref)]
4355fn btultra2_seed_pass_disabled_when_dictionary_entropy_seed_present() {
4356 let mut hc = HcMatchGenerator::new(1 << 20);
4357 hc.configure(
4358 BTULTRA2_HC_CONFIG,
4359 super::strategy::StrategyTag::BtUltra2,
4360 26,
4361 );
4362 let ll = crate::fse::fse_encoder::default_ll_table();
4363 let ml = crate::fse::fse_encoder::default_ml_table();
4364 let of = crate::fse::fse_encoder::default_of_table();
4365 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4366 assert!(
4367 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
4368 "dictionary-seeded first block should skip btultra2 warmup pass"
4369 );
4370}
4371
4372#[test]
4373fn btultra2_seed_pass_disabled_when_prefix_history_exists() {
4374 let mut hc = HcMatchGenerator::new(1 << 20);
4375 hc.configure(
4376 BTULTRA2_HC_CONFIG,
4377 super::strategy::StrategyTag::BtUltra2,
4378 26,
4379 );
4380 hc.table.history_abs_start = 17;
4381 hc.table.window.push_back(b"abcdefghijklmnop".to_vec());
4382 assert!(
4383 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 9),
4384 "btultra2 warmup must be first-block only (no prefix history)"
4385 );
4386}
4387
4388#[test]
4389fn btultra2_seed_pass_disabled_for_tiny_block() {
4390 let mut hc = HcMatchGenerator::new(1 << 20);
4391 hc.configure(
4392 BTULTRA2_HC_CONFIG,
4393 super::strategy::StrategyTag::BtUltra2,
4394 26,
4395 );
4396 assert!(
4397 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD),
4398 "btultra2 warmup should not run at or below predefined threshold"
4399 );
4400}
4401
4402#[test]
4403fn btultra2_seed_pass_disabled_after_stats_initialized() {
4404 let mut hc = HcMatchGenerator::new(1 << 20);
4405 hc.configure(
4406 BTULTRA2_HC_CONFIG,
4407 super::strategy::StrategyTag::BtUltra2,
4408 26,
4409 );
4410 hc.backend.bt_mut().opt_state.lit_length_sum = 1;
4411 assert!(
4412 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
4413 "btultra2 warmup should run only for first block before stats are initialized"
4414 );
4415}
4416
4417#[test]
4418fn btultra2_seed_pass_disabled_when_not_at_frame_start() {
4419 let mut hc = HcMatchGenerator::new(1 << 20);
4420 hc.configure(
4421 BTULTRA2_HC_CONFIG,
4422 super::strategy::StrategyTag::BtUltra2,
4423 26,
4424 );
4425 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
4428 hc.table
4429 .window
4430 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 32]);
4431 assert!(
4432 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
4433 "btultra2 warmup must not run after frame start"
4434 );
4435}
4436
4437#[test]
4438fn btultra2_seed_pass_disabled_when_ldm_sequences_exist() {
4439 let mut hc = HcMatchGenerator::new(1 << 20);
4440 hc.configure(
4441 BTULTRA2_HC_CONFIG,
4442 super::strategy::StrategyTag::BtUltra2,
4443 26,
4444 );
4445 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
4446 hc.table
4447 .window
4448 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 64]);
4449 hc.backend.bt_mut().ldm_sequences.push(HcRawSeq {
4450 lit_length: 8,
4451 offset: 16,
4452 match_length: 32,
4453 });
4454 assert!(
4455 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
4456 "btultra2 warmup must not run when LDM already produced sequences"
4457 );
4458}
4459
4460#[test]
4461fn literal_price_uses_eight_bits_when_literals_uncompressed() {
4462 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4463 let mut stats = HcOptState::new();
4464 stats.set_literals_compressed_for_tests(false);
4465 stats.price_type = HcOptPriceType::Predefined;
4466 assert_eq!(
4467 profile.literal_price(&stats, b'a'),
4468 8 * HC_BITCOST_MULTIPLIER,
4469 "uncompressed literals should cost 8 bits regardless of price mode"
4470 );
4471}
4472
4473#[test]
4474fn update_stats_skips_literal_frequencies_when_uncompressed() {
4475 let mut stats = HcOptState::new();
4476 stats.set_literals_compressed_for_tests(false);
4477 stats.update_stats(3, b"abc", 4, 8);
4478 assert_eq!(
4479 stats.lit_sum, 0,
4480 "literal sum must remain unchanged when literal compression is disabled"
4481 );
4482 assert_eq!(
4483 stats.lit_freq.iter().copied().sum::<u32>(),
4484 0,
4485 "literal frequencies must not be updated when literal compression is disabled"
4486 );
4487 assert_eq!(
4488 stats.lit_length_sum, 1,
4489 "literal-length stats still update for sequence modeling"
4490 );
4491 assert_eq!(
4492 stats.match_length_sum, 1,
4493 "match-length stats still update for sequence modeling"
4494 );
4495 assert_eq!(
4496 stats.off_code_sum, 1,
4497 "offset-code stats still update for sequence modeling"
4498 );
4499}
4500
4501#[test]
4502#[allow(clippy::borrow_deref_ref)]
4503fn dictionary_huffman_seed_ignored_when_literals_uncompressed() {
4504 let mut stats = HcOptState::new();
4505 stats.set_literals_compressed_for_tests(false);
4506 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
4507 b"aaaaabbbbcccddeeff00112233445566778899",
4508 );
4509 let ll = crate::fse::fse_encoder::default_ll_table();
4510 let ml = crate::fse::fse_encoder::default_ml_table();
4511 let of = crate::fse::fse_encoder::default_of_table();
4512 stats.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
4513 stats.rescale_freqs(
4514 b"abcd",
4515 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4516 );
4517 assert_eq!(
4518 stats.lit_sum, 0,
4519 "literal sum must stay zero when literals are uncompressed"
4520 );
4521 assert_eq!(
4522 stats.lit_freq.iter().copied().sum::<u32>(),
4523 0,
4524 "literal frequencies must ignore dictionary huffman seed when uncompressed"
4525 );
4526}
4527
4528#[test]
4529fn hc_repcode_candidates_respect_litlen_dependent_rep_order() {
4530 let mut hc = HcMatchGenerator::new(64);
4531 hc.table.history = b"xxxxxxABCDEFABCDEF".to_vec();
4532 hc.table.history_start = 0;
4533 hc.table.history_abs_start = 0;
4534
4535 let abs_pos = 12usize; let current_abs_end = hc.table.history.len();
4537 let reps = [6u32, 3u32, 9u32];
4538
4539 let mut lit_pos_candidates = Vec::new();
4540 hc.hc.for_each_repcode_candidate_with_reps(
4541 &hc.table,
4542 abs_pos,
4543 1,
4544 reps,
4545 current_abs_end,
4546 HC_OPT_MIN_MATCH_LEN,
4547 |c| {
4548 lit_pos_candidates.push(c.offset);
4549 },
4550 );
4551 assert!(
4552 lit_pos_candidates.contains(&6),
4553 "when lit_len>0, rep0 should be considered and match"
4554 );
4555
4556 let mut ll0_candidates = Vec::new();
4557 hc.hc.for_each_repcode_candidate_with_reps(
4558 &hc.table,
4559 abs_pos,
4560 0,
4561 reps,
4562 current_abs_end,
4563 HC_OPT_MIN_MATCH_LEN,
4564 |c| {
4565 ll0_candidates.push(c.offset);
4566 },
4567 );
4568 assert!(
4569 !ll0_candidates.contains(&6),
4570 "when lit_len==0, rep0 is not directly eligible (ll0 semantics)"
4571 );
4572}
4573
4574#[test]
4575fn hc_collect_optimal_candidates_keeps_reps_when_chain_depth_zero() {
4576 let mut hc = HcMatchGenerator::new(64);
4577 hc.hc.search_depth = 0;
4578 hc.table.history = b"xyzxyzxyzxyz".to_vec();
4579 hc.table.history_start = 0;
4580 hc.table.history_abs_start = 0;
4581
4582 let abs_pos = 6usize;
4583 let current_abs_end = hc.table.history.len();
4584 let profile = HcOptimalCostProfile {
4585 max_chain_depth: 0,
4586 sufficient_match_len: usize::MAX / 2,
4587 accurate: false,
4588 favor_small_offsets: false,
4589 };
4590 let mut out = Vec::new();
4591 hc.collect_optimal_candidates(
4592 abs_pos,
4593 current_abs_end,
4594 profile,
4595 HcCandidateQuery {
4596 reps: [3, 6, 9],
4597 lit_len: 1,
4598 ldm_candidate: None,
4599 },
4600 &mut out,
4601 );
4602 assert!(
4603 !out.is_empty(),
4604 "rep candidates should remain available even when chain depth is zero"
4605 );
4606 assert!(
4607 out.iter().any(|c| c.offset == 3),
4608 "rep0 candidate should be retained"
4609 );
4610}
4611
4612#[test]
4613fn hc_collect_optimal_candidates_rep_tail_match_skips_chain_probe() {
4614 let mut hc = HcMatchGenerator::new(64);
4615 hc.table.history = b"aaaaaaaaaa".to_vec();
4616 hc.table.history_start = 0;
4617 hc.table.history_abs_start = 0;
4618 hc.table.position_base = 0;
4619 hc.hc.search_depth = 32;
4620 let abs_pos = 6usize;
4621 hc.table.ensure_tables();
4622 hc.table.insert_positions(0, abs_pos);
4623
4624 let profile = HcOptimalCostProfile {
4625 max_chain_depth: 32,
4626 sufficient_match_len: usize::MAX / 2,
4627 accurate: true,
4628 favor_small_offsets: false,
4629 };
4630 let mut out = Vec::new();
4631 hc.collect_optimal_candidates(
4632 abs_pos,
4633 hc.table.history.len(),
4634 profile,
4635 HcCandidateQuery {
4636 reps: [1, 4, 8],
4637 lit_len: 1,
4638 ldm_candidate: None,
4639 },
4640 &mut out,
4641 );
4642
4643 assert!(
4644 out.iter()
4645 .all(|candidate| matches!(candidate.offset, 1 | 4)),
4646 "terminal rep match should return before chain probing adds non-rep offsets"
4647 );
4648}
4649
4650#[test]
4651fn hc_collect_optimal_candidates_long_chain_match_advances_skip_window() {
4652 let mut hc = HcMatchGenerator::new(128);
4653 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
4654 hc.table.history_start = 0;
4655 hc.table.history_abs_start = 0;
4656 hc.table.position_base = 0;
4657 hc.hc.search_depth = 32;
4658 let abs_pos = 9usize;
4659 hc.table.ensure_tables();
4660 hc.table.insert_positions(0, abs_pos);
4661 hc.table.skip_insert_until_abs = 0;
4662
4663 let profile = HcOptimalCostProfile {
4664 max_chain_depth: 32,
4665 sufficient_match_len: usize::MAX / 2,
4666 accurate: true,
4667 favor_small_offsets: false,
4668 };
4669 let mut out = Vec::new();
4670 hc.collect_optimal_candidates(
4671 abs_pos,
4672 hc.table.history.len(),
4673 profile,
4674 HcCandidateQuery {
4675 reps: [1, 4, 8],
4676 lit_len: 1,
4677 ldm_candidate: None,
4678 },
4679 &mut out,
4680 );
4681
4682 assert!(
4683 hc.table.skip_insert_until_abs > abs_pos,
4684 "long chain match should advance skip window to avoid redundant immediate insertions"
4685 );
4686}
4687
4688#[test]
4689fn hc_collect_optimal_candidates_chain_fast_skip_uses_match_end_minus_8() {
4690 let mut hc = HcMatchGenerator::new(128);
4691 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
4692 hc.table.history_start = 0;
4693 hc.table.history_abs_start = 0;
4694 hc.table.position_base = 0;
4695 hc.hc.search_depth = 32;
4696 let abs_pos = 9usize;
4697 hc.table.ensure_tables();
4698 hc.table.insert_positions(0, abs_pos);
4699 hc.table.skip_insert_until_abs = 0;
4700
4701 let profile = HcOptimalCostProfile {
4702 max_chain_depth: 32,
4703 sufficient_match_len: 10,
4704 accurate: true,
4705 favor_small_offsets: false,
4706 };
4707 let mut out = Vec::new();
4708 hc.collect_optimal_candidates(
4709 abs_pos,
4710 hc.table.history.len(),
4711 profile,
4712 HcCandidateQuery {
4713 reps: [1, 4, 8],
4714 lit_len: 1,
4715 ldm_candidate: None,
4716 },
4717 &mut out,
4718 );
4719
4720 let best_match_end = out
4721 .iter()
4722 .map(|candidate| candidate.start.saturating_add(candidate.match_len))
4723 .max()
4724 .expect("expected at least one candidate");
4725 assert!(
4726 hc.table.skip_insert_until_abs > abs_pos,
4727 "chain fast-skip must advance past current position"
4728 );
4729 assert!(
4730 hc.table.skip_insert_until_abs <= best_match_end.saturating_sub(8),
4731 "chain fast-skip must not exceed donor-style matchEndIdx - 8 bound"
4732 );
4733}
4734
4735#[test]
4736fn hc_collect_optimal_candidates_advances_skip_window_on_plain_bt_path() {
4737 let mut hc = HcMatchGenerator::new(256);
4738 hc.table.history = b"abcdefghijklmnop".to_vec();
4739 hc.table.history_start = 0;
4740 hc.table.history_abs_start = 0;
4741 hc.table.position_base = 0;
4742 hc.hc.search_depth = 0;
4743 hc.table.ensure_tables();
4744
4745 let abs_pos = 8usize;
4746 hc.table.skip_insert_until_abs = 0;
4747
4748 let profile = HcOptimalCostProfile {
4749 max_chain_depth: 0,
4750 sufficient_match_len: usize::MAX / 2,
4751 accurate: true,
4752 favor_small_offsets: false,
4753 };
4754 let mut out = Vec::new();
4755 hc.collect_optimal_candidates(
4756 abs_pos,
4757 hc.table.history.len(),
4758 profile,
4759 HcCandidateQuery {
4760 reps: [1, 4, 8],
4761 lit_len: 1,
4762 ldm_candidate: None,
4763 },
4764 &mut out,
4765 );
4766
4767 assert_eq!(
4768 hc.table.skip_insert_until_abs,
4769 abs_pos.saturating_add(1),
4770 "plain BT path should advance skip window by 1 via donor matchEndIdx baseline"
4771 );
4772}
4773
4774#[test]
4787fn hc_ldm_candidates_are_merged_into_optimal_candidates() {
4788 let mut hc = HcMatchGenerator::new(512);
4789 hc.table.history = (0..256).map(|i| (i % 251) as u8).collect();
4790 hc.table.history_start = 0;
4791 hc.table.history_abs_start = 0;
4792
4793 let abs_pos = 128usize;
4794 let current_abs_end = 256usize;
4795 let ldm = MatchCandidate {
4796 start: abs_pos,
4797 offset: 96,
4798 match_len: 40,
4799 };
4800
4801 let profile = HcOptimalCostProfile {
4802 max_chain_depth: 0,
4803 sufficient_match_len: usize::MAX / 2,
4804 accurate: true,
4805 favor_small_offsets: false,
4806 };
4807 let mut out = Vec::new();
4808 hc.collect_optimal_candidates(
4809 abs_pos,
4810 current_abs_end,
4811 profile,
4812 HcCandidateQuery {
4813 reps: [1, 4, 8],
4814 lit_len: 1,
4815 ldm_candidate: Some(ldm),
4816 },
4817 &mut out,
4818 );
4819 assert!(
4820 out.iter().any(
4821 |candidate| candidate.offset == ldm.offset && candidate.match_len == ldm.match_len
4822 ),
4823 "LDM candidate should be present in optimal candidate set"
4824 );
4825}
4826
4827#[test]
4828fn btultra_and_btultra2_both_keep_dictionary_candidates() {
4829 use super::strategy::StrategyTag;
4837
4838 let test_config = HcConfig {
4839 hash_log: 23,
4840 chain_log: 22,
4841 search_depth: 32,
4842 target_len: 256,
4843 };
4844 let window_log = 20u8;
4845
4846 let prepare_history = |hc: &mut HcMatchGenerator, abs_pos: usize| {
4847 hc.table.history = alloc::vec![0u8; 160];
4848 for i in 0..64 {
4849 hc.table.history[i] = b'a' + (i % 7) as u8;
4850 }
4851 for i in 64..160 {
4852 hc.table.history[i] = b'k' + (i % 5) as u8;
4853 }
4854 for i in 0..24 {
4855 hc.table.history[abs_pos + i] = hc.table.history[16 + i];
4856 }
4857 hc.table.history_start = 0;
4858 hc.table.history_abs_start = 0;
4859 hc.table.position_base = 0;
4860 hc.table.ensure_tables();
4861 hc.table.insert_positions(0, abs_pos);
4862 hc.table.dictionary_limit_abs = Some(64);
4863 hc.table.skip_insert_until_abs = 0;
4864 };
4865
4866 let profile = HcOptimalCostProfile {
4867 max_chain_depth: 32,
4868 sufficient_match_len: usize::MAX / 2,
4869 accurate: true,
4870 favor_small_offsets: false,
4871 };
4872 let abs_pos = 96usize;
4873 let mut out = Vec::new();
4874
4875 let mut hc = HcMatchGenerator::new(256);
4876 hc.configure(test_config, StrategyTag::BtUltra2, window_log);
4877 prepare_history(&mut hc, abs_pos);
4878 hc.collect_optimal_candidates(
4879 abs_pos,
4880 160,
4881 profile,
4882 HcCandidateQuery {
4883 reps: [1, 4, 8],
4884 lit_len: 1,
4885 ldm_candidate: None,
4886 },
4887 &mut out,
4888 );
4889 assert!(
4890 out.iter().any(|candidate| candidate.offset >= 32),
4891 "btultra2 should retain dictionary candidates on donor-parity path"
4892 );
4893
4894 let mut hc = HcMatchGenerator::new(256);
4895 hc.configure(test_config, StrategyTag::BtUltra, window_log);
4896 prepare_history(&mut hc, abs_pos);
4897 hc.collect_optimal_candidates(
4898 abs_pos,
4899 160,
4900 profile,
4901 HcCandidateQuery {
4902 reps: [1, 4, 8],
4903 lit_len: 1,
4904 ldm_candidate: None,
4905 },
4906 &mut out,
4907 );
4908 assert!(
4909 out.iter().any(|candidate| candidate.offset >= 32),
4910 "btultra should retain dictionary candidates"
4911 );
4912}
4913
4914#[test]
4915fn driver_small_source_hint_shrinks_dfast_hash_tables() {
4916 let mut driver = MatchGeneratorDriver::new(32, 2);
4917
4918 driver.reset(CompressionLevel::Level(2));
4919 let mut space = driver.get_next_space();
4920 space[..12].copy_from_slice(b"abcabcabcabc");
4921 space.truncate(12);
4922 driver.commit_space(space);
4923 driver.skip_matching_with_hint(None);
4924 let full_long = driver.dfast_matcher().long_hash.len();
4927 let full_short = driver.dfast_matcher().short_hash.len();
4928 assert_eq!(full_long, 1 << DFAST_HASH_BITS);
4929 assert_eq!(
4930 full_short,
4931 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA)
4932 );
4933
4934 driver.set_source_size_hint(1024);
4935 driver.reset(CompressionLevel::Level(2));
4936 let mut space = driver.get_next_space();
4937 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
4938 space.truncate(12);
4939 driver.commit_space(space);
4940 driver.skip_matching_with_hint(None);
4941 let hinted_long = driver.dfast_matcher().long_hash.len();
4942 let hinted_short = driver.dfast_matcher().short_hash.len();
4943
4944 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
4945 assert_eq!(hinted_long, 1 << MIN_HINTED_WINDOW_LOG);
4952 let expected_hinted_short_bits = (MIN_HINTED_WINDOW_LOG as usize)
4953 .saturating_sub(DFAST_SHORT_HASH_BITS_DELTA)
4954 .max(MIN_WINDOW_LOG as usize);
4955 assert_eq!(
4956 hinted_short,
4957 1 << expected_hinted_short_bits,
4958 "short table must sit one DFAST_SHORT_HASH_BITS_DELTA below the long table \
4959 (clamped at MIN_WINDOW_LOG) — a regression that pulls it up to the long-table \
4960 floor would still satisfy the `< full_short` bound below and slip through"
4961 );
4962 assert!(
4963 hinted_long < full_long && hinted_short < full_short,
4964 "tiny source hint should reduce both dfast tables"
4965 );
4966}
4967
4968#[test]
4969fn driver_small_source_hint_shrinks_row_hash_tables() {
4970 let mut driver = MatchGeneratorDriver::new(32, 2);
4971
4972 driver.reset(CompressionLevel::Level(4));
4973 let mut space = driver.get_next_space();
4974 space[..12].copy_from_slice(b"abcabcabcabc");
4975 space.truncate(12);
4976 driver.commit_space(space);
4977 driver.skip_matching_with_hint(None);
4978 let full_rows = driver.row_matcher().row_heads.len();
4979 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
4980
4981 driver.set_source_size_hint(1024);
4982 driver.reset(CompressionLevel::Level(4));
4983 let mut space = driver.get_next_space();
4984 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
4985 space.truncate(12);
4986 driver.commit_space(space);
4987 driver.skip_matching_with_hint(None);
4988 let hinted_rows = driver.row_matcher().row_heads.len();
4989
4990 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
4991 assert_eq!(
4992 hinted_rows,
4993 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
4994 );
4995 assert!(
4996 hinted_rows < full_rows,
4997 "tiny source hint should reduce row hash table footprint"
4998 );
4999}
5000
5001#[test]
5002fn row_matches_roundtrip_multi_block_pattern() {
5003 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
5004 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5005 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5006
5007 let mut matcher = RowMatchGenerator::new(1 << 22);
5008 matcher.configure(ROW_CONFIG);
5009 matcher.ensure_tables();
5010 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5011 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5012 Sequence::Triple {
5013 literals,
5014 offset,
5015 match_len,
5016 } => {
5017 decoded.extend_from_slice(literals);
5018 let start = decoded.len() - offset;
5019 for i in 0..match_len {
5020 let byte = decoded[start + i];
5021 decoded.push(byte);
5022 }
5023 }
5024 };
5025
5026 matcher.add_data(first_block.clone(), |_| {});
5027 let mut history = Vec::new();
5028 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5029 assert_eq!(history, first_block);
5030
5031 matcher.add_data(second_block.clone(), |_| {});
5032 let prefix_len = history.len();
5033 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5034
5035 assert_eq!(&history[prefix_len..], second_block.as_slice());
5036
5037 let third_block: Vec<u8> = (0u8..=255).collect();
5039 matcher.add_data(third_block.clone(), |_| {});
5040 let third_prefix = history.len();
5041 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5042 assert_eq!(&history[third_prefix..], third_block.as_slice());
5043}
5044
5045#[test]
5046fn row_short_block_emits_literals_only() {
5047 let mut matcher = RowMatchGenerator::new(1 << 22);
5048 matcher.configure(ROW_CONFIG);
5049
5050 matcher.add_data(b"abcde".to_vec(), |_| {});
5051
5052 let mut saw_triple = false;
5053 let mut reconstructed = Vec::new();
5054 matcher.start_matching(|seq| match seq {
5055 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
5056 Sequence::Triple { .. } => saw_triple = true,
5057 });
5058
5059 assert!(
5060 !saw_triple,
5061 "row backend must not emit triples for short blocks"
5062 );
5063 assert_eq!(reconstructed, b"abcde");
5064
5065 saw_triple = false;
5067 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
5068 matcher.start_matching(|seq| {
5069 if let Sequence::Triple { .. } = seq {
5070 saw_triple = true;
5071 }
5072 });
5073 assert!(
5074 saw_triple,
5075 "row backend should emit triples on repeated data"
5076 );
5077}
5078
5079#[test]
5080fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
5081 let mut matcher = RowMatchGenerator::new(1 << 22);
5082 matcher.configure(ROW_CONFIG);
5083 matcher.add_data(b"abcabc".to_vec(), |_| {});
5084
5085 let best = MatchCandidate {
5086 start: 0,
5087 offset: 1,
5088 match_len: ROW_MIN_MATCH_LEN,
5089 };
5090 let picked = matcher
5091 .pick_lazy_match(0, 0, Some(best))
5092 .expect("best candidate must survive");
5093
5094 assert_eq!(picked.start, best.start);
5095 assert_eq!(picked.offset, best.offset);
5096 assert_eq!(picked.match_len, best.match_len);
5097}
5098
5099#[test]
5100fn row_backfills_previous_block_tail_for_cross_boundary_match() {
5101 let mut matcher = RowMatchGenerator::new(1 << 22);
5102 matcher.configure(ROW_CONFIG);
5103
5104 let mut first_block = alloc::vec![0xA5; 64];
5105 first_block.extend_from_slice(b"XYZ");
5106 let second_block = b"XYZXYZtail".to_vec();
5107
5108 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5109 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5110 Sequence::Triple {
5111 literals,
5112 offset,
5113 match_len,
5114 } => {
5115 decoded.extend_from_slice(literals);
5116 let start = decoded.len() - offset;
5117 for i in 0..match_len {
5118 let byte = decoded[start + i];
5119 decoded.push(byte);
5120 }
5121 }
5122 };
5123
5124 matcher.add_data(first_block.clone(), |_| {});
5125 let mut reconstructed = Vec::new();
5126 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
5127 assert_eq!(reconstructed, first_block);
5128
5129 matcher.add_data(second_block.clone(), |_| {});
5130 let mut saw_cross_boundary = false;
5131 let prefix_len = reconstructed.len();
5132 matcher.start_matching(|seq| {
5133 if let Sequence::Triple {
5134 literals,
5135 offset,
5136 match_len,
5137 } = seq
5138 && literals.is_empty()
5139 && offset == 3
5140 && match_len >= ROW_MIN_MATCH_LEN
5141 {
5142 saw_cross_boundary = true;
5143 }
5144 replay_sequence(&mut reconstructed, seq);
5145 });
5146
5147 assert!(
5148 saw_cross_boundary,
5149 "row matcher should reuse the 3-byte previous-block tail"
5150 );
5151 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
5152}
5153
5154#[test]
5155fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
5156 let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
5157
5158 let mut dense = RowMatchGenerator::new(1 << 22);
5159 dense.configure(ROW_CONFIG);
5160 dense.add_data(data.clone(), |_| {});
5161 dense.skip_matching_with_hint(Some(false));
5162 let dense_slots = dense
5163 .row_positions
5164 .iter()
5165 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5166 .count();
5167
5168 let mut sparse = RowMatchGenerator::new(1 << 22);
5169 sparse.configure(ROW_CONFIG);
5170 sparse.add_data(data, |_| {});
5171 sparse.skip_matching_with_hint(Some(true));
5172 let sparse_slots = sparse
5173 .row_positions
5174 .iter()
5175 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5176 .count();
5177
5178 assert!(
5179 sparse_slots < dense_slots,
5180 "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
5181 );
5182}
5183
5184#[test]
5185fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
5186 let mut driver = MatchGeneratorDriver::new(32, 2);
5187
5188 driver.reset(CompressionLevel::Level(2));
5189 let mut space = driver.get_next_space();
5190 space[..12].copy_from_slice(b"abcabcabcabc");
5191 space.truncate(12);
5192 driver.commit_space(space);
5193 driver.skip_matching_with_hint(None);
5194
5195 let long_len = driver.dfast_matcher().long_hash.len();
5199 let short_len = driver.dfast_matcher().short_hash.len();
5200 assert_eq!(
5201 long_len,
5202 1 << DFAST_HASH_BITS,
5203 "unhinted Level(2) should keep default long-hash table size"
5204 );
5205 assert_eq!(
5206 short_len,
5207 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA),
5208 "unhinted Level(2) short-hash should be one bit smaller than long-hash"
5209 );
5210}
5211
5212#[test]
5213fn simple_backend_rejects_undersized_pooled_suffix_store() {
5214 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5215 driver.reset(CompressionLevel::Fastest);
5216
5217 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
5218
5219 let mut space = driver.get_next_space();
5220 space.clear();
5221 space.resize(4096, 0xAB);
5222 driver.commit_space(space);
5223
5224 let last_suffix_slots = driver
5225 .simple()
5226 .window
5227 .last()
5228 .expect("window entry must exist after commit")
5229 .suffixes
5230 .slots
5231 .len();
5232 assert!(
5233 last_suffix_slots >= 4096,
5234 "undersized pooled suffix store must not be reused for larger blocks"
5235 );
5236}
5237
5238#[test]
5239fn source_hint_clamps_driver_slice_size_to_window() {
5240 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5241 driver.set_source_size_hint(1024);
5242 driver.reset(CompressionLevel::Default);
5243
5244 let window = driver.window_size() as usize;
5245 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
5246 assert_eq!(driver.slice_size, window);
5247
5248 let space = driver.get_next_space();
5249 assert_eq!(space.len(), window);
5250 driver.commit_space(space);
5251}
5252
5253#[test]
5254fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
5255 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5256 driver.reset(CompressionLevel::Default);
5257
5258 let large = driver.get_next_space();
5259 let large_capacity = large.capacity();
5260 assert!(large_capacity >= 128 * 1024);
5261 driver.commit_space(large);
5262
5263 driver.set_source_size_hint(1024);
5264 driver.reset(CompressionLevel::Default);
5265
5266 let small = driver.get_next_space();
5267 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
5268 assert!(
5269 small.capacity() >= large_capacity,
5270 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
5271 );
5272}
5273
5274#[test]
5275fn driver_best_to_fastest_releases_oversized_hc_tables() {
5276 let mut driver = MatchGeneratorDriver::new(32, 2);
5277
5278 driver.reset(CompressionLevel::Best);
5280 assert_eq!(driver.window_size(), (1u64 << 24));
5281
5282 let mut space = driver.get_next_space();
5284 space[..12].copy_from_slice(b"abcabcabcabc");
5285 space.truncate(12);
5286 driver.commit_space(space);
5287 driver.skip_matching_with_hint(None);
5288
5289 driver.reset(CompressionLevel::Fastest);
5304 assert_eq!(driver.window_size(), (1u64 << 17));
5305 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
5306}
5307
5308#[test]
5309fn driver_better_to_best_resizes_hc_tables() {
5310 let mut driver = MatchGeneratorDriver::new(32, 2);
5311
5312 driver.reset(CompressionLevel::Better);
5314 assert_eq!(driver.window_size(), (1u64 << 23));
5315
5316 let mut space = driver.get_next_space();
5317 space[..12].copy_from_slice(b"abcabcabcabc");
5318 space.truncate(12);
5319 driver.commit_space(space);
5320 driver.skip_matching_with_hint(None);
5321
5322 let hc = driver.hc_matcher();
5323 let better_hash_len = hc.table.hash_table.len();
5324 let better_chain_len = hc.table.chain_table.len();
5325
5326 driver.reset(CompressionLevel::Best);
5328 assert_eq!(driver.window_size(), (1u64 << 24));
5329
5330 let mut space = driver.get_next_space();
5332 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5333 space.truncate(12);
5334 driver.commit_space(space);
5335 driver.skip_matching_with_hint(None);
5336
5337 let hc = driver.hc_matcher();
5338 assert!(
5339 hc.table.hash_table.len() > better_hash_len,
5340 "Best hash_table ({}) should be larger than Better ({})",
5341 hc.table.hash_table.len(),
5342 better_hash_len
5343 );
5344 assert!(
5345 hc.table.chain_table.len() > better_chain_len,
5346 "Best chain_table ({}) should be larger than Better ({})",
5347 hc.table.chain_table.len(),
5348 better_chain_len
5349 );
5350}
5351
5352#[test]
5353fn prime_with_dictionary_preserves_history_for_first_full_block() {
5354 let mut driver = MatchGeneratorDriver::new(8, 1);
5355 driver.reset(CompressionLevel::Fastest);
5356
5357 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5358
5359 let mut space = driver.get_next_space();
5360 space.clear();
5361 space.extend_from_slice(b"abcdefgh");
5362 driver.commit_space(space);
5363
5364 let mut saw_match = false;
5365 driver.start_matching(|seq| {
5366 if let Sequence::Triple {
5367 literals,
5368 offset,
5369 match_len,
5370 } = seq
5371 && literals.is_empty()
5372 && offset == 8
5373 && match_len >= MIN_MATCH_LEN
5374 {
5375 saw_match = true;
5376 }
5377 });
5378
5379 assert!(
5380 saw_match,
5381 "first full block should still match dictionary-primed history"
5382 );
5383}
5384
5385#[test]
5386fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
5387 let mut driver = MatchGeneratorDriver::new(8, 1);
5388 driver.reset(CompressionLevel::Fastest);
5389
5390 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
5391
5392 let mut space = driver.get_next_space();
5393 space.clear();
5394 space.extend_from_slice(b"abcdefgh");
5395 driver.commit_space(space);
5396
5397 let mut saw_match = false;
5398 driver.start_matching(|seq| {
5399 if let Sequence::Triple {
5400 literals,
5401 offset,
5402 match_len,
5403 } = seq
5404 && literals.is_empty()
5405 && offset == 24
5406 && match_len >= MIN_MATCH_LEN
5407 {
5408 saw_match = true;
5409 }
5410 });
5411
5412 assert!(
5413 saw_match,
5414 "dictionary bytes should remain addressable until frame output exceeds the live window"
5415 );
5416}
5417
5418#[test]
5419fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
5420 let mut driver = MatchGeneratorDriver::new(8, 1);
5421 driver.reset(CompressionLevel::Fastest);
5422
5423 driver.prime_with_dictionary(&[], [11, 7, 3]);
5424
5425 assert_eq!(driver.simple_mut().offset_hist, [11, 7, 3]);
5426}
5427
5428#[test]
5429fn hc_prime_with_empty_dictionary_disables_btultra2_seed_pass() {
5430 let mut driver = MatchGeneratorDriver::new(8, 1);
5431 driver.reset(CompressionLevel::Better);
5432
5433 driver.prime_with_dictionary(&[], [11, 7, 3]);
5434
5435 assert_eq!(driver.hc_matcher().table.offset_hist, [11, 7, 3]);
5436 assert!(
5437 !driver
5438 .hc_matcher()
5439 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
5440 "btultra2 warmup must stay disabled after dictionary priming, even when dict content is empty"
5441 );
5442}
5443
5444#[test]
5445fn hc_prime_with_dictionary_disables_btultra2_seed_pass() {
5446 let mut driver = MatchGeneratorDriver::new(8, 1);
5447 driver.reset(CompressionLevel::Better);
5448
5449 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5450
5451 assert!(
5452 !driver
5453 .hc_matcher()
5454 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
5455 "btultra2 warmup must stay disabled after dictionary priming with content"
5456 );
5457}
5458
5459#[test]
5460fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
5461 let mut driver = MatchGeneratorDriver::new(8, 1);
5462 driver.reset(CompressionLevel::Level(2));
5463
5464 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5465
5466 let mut space = driver.get_next_space();
5467 space.clear();
5468 space.extend_from_slice(b"abcdefgh");
5469 driver.commit_space(space);
5470
5471 let mut saw_match = false;
5472 driver.start_matching(|seq| {
5473 if let Sequence::Triple {
5474 literals,
5475 offset,
5476 match_len,
5477 } = seq
5478 && literals.is_empty()
5479 && offset == 8
5480 && match_len >= DFAST_MIN_MATCH_LEN
5481 {
5482 saw_match = true;
5483 }
5484 });
5485
5486 assert!(
5487 saw_match,
5488 "dfast backend should match dictionary-primed history in first full block"
5489 );
5490}
5491
5492#[test]
5493fn prime_with_dictionary_does_not_inflate_reported_window_size() {
5494 let mut driver = MatchGeneratorDriver::new(8, 1);
5495 driver.reset(CompressionLevel::Fastest);
5496
5497 let before = driver.window_size();
5498 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
5499 let after = driver.window_size();
5500
5501 assert_eq!(
5502 after, before,
5503 "dictionary retention budget must not change reported frame window size"
5504 );
5505}
5506
5507#[test]
5508fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
5509 let mut driver = MatchGeneratorDriver::new(8, 2);
5510 driver.reset(CompressionLevel::Fastest);
5511
5512 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
5515
5516 assert!(
5517 driver
5518 .simple()
5519 .window
5520 .iter()
5521 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
5522 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
5523 );
5524}
5525
5526#[test]
5527fn prime_with_dictionary_counts_only_committed_tail_budget() {
5528 let mut driver = MatchGeneratorDriver::new(8, 1);
5529 driver.reset(CompressionLevel::Fastest);
5530
5531 let before = driver.simple_mut().max_window_size;
5532 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
5534
5535 assert_eq!(
5536 driver.simple_mut().max_window_size,
5537 before + 8,
5538 "retention budget must account only for dictionary bytes actually committed to history"
5539 );
5540}
5541
5542#[test]
5543fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
5544 let mut driver = MatchGeneratorDriver::new(8, 1);
5545 driver.reset(CompressionLevel::Level(2));
5546
5547 let before = driver.dfast_matcher().max_window_size;
5548 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
5551
5552 assert_eq!(
5553 driver.dfast_matcher().max_window_size,
5554 before + 12,
5555 "dfast retention budget should include 4-byte dictionary tails"
5556 );
5557}
5558
5559#[test]
5560fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
5561 let mut driver = MatchGeneratorDriver::new(8, 1);
5562 driver.reset(CompressionLevel::Level(4));
5563
5564 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5565
5566 let mut space = driver.get_next_space();
5567 space.clear();
5568 space.extend_from_slice(b"abcdefgh");
5569 driver.commit_space(space);
5570
5571 let mut saw_match = false;
5572 driver.start_matching(|seq| {
5573 if let Sequence::Triple {
5574 literals,
5575 offset,
5576 match_len,
5577 } = seq
5578 && literals.is_empty()
5579 && offset == 8
5580 && match_len >= ROW_MIN_MATCH_LEN
5581 {
5582 saw_match = true;
5583 }
5584 });
5585
5586 assert!(
5587 saw_match,
5588 "row backend should match dictionary-primed history in first full block"
5589 );
5590}
5591
5592#[test]
5593fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
5594 let mut driver = MatchGeneratorDriver::new(8, 1);
5595 driver.reset(CompressionLevel::Level(4));
5596
5597 let base_window = driver.row_matcher().max_window_size;
5598 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
5601
5602 assert_eq!(
5603 driver.row_matcher().max_window_size,
5604 base_window + 8,
5605 "row retained window must exclude uncommitted 1-byte tail"
5606 );
5607}
5608
5609#[test]
5610fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
5611 let mut driver = MatchGeneratorDriver::new(8, 1);
5612 driver.reset(CompressionLevel::Level(4));
5613 driver.row_matcher_mut().max_window_size = 8;
5615 driver.reported_window_size = 8;
5616
5617 let base_window = driver.row_matcher().max_window_size;
5618 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
5619 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
5620
5621 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
5622 let mut space = driver.get_next_space();
5623 space.clear();
5624 space.extend_from_slice(block);
5625 driver.commit_space(space);
5626 driver.skip_matching_with_hint(None);
5627 }
5628
5629 assert_eq!(
5630 driver.dictionary_retained_budget, 0,
5631 "dictionary budget should be fully retired once primed dict slices are evicted"
5632 );
5633 assert_eq!(
5634 driver.row_matcher().max_window_size,
5635 base_window,
5636 "retired dictionary budget must not remain reusable for live history"
5637 );
5638}
5639
5640#[test]
5650fn row_get_last_space_then_reset_to_fastest_drops_row_variant() {
5651 let mut driver = MatchGeneratorDriver::new(8, 1);
5652 driver.reset(CompressionLevel::Level(4));
5653 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
5654
5655 let mut space = driver.get_next_space();
5656 space.clear();
5657 space.extend_from_slice(b"row-data");
5658 driver.commit_space(space);
5659
5660 assert_eq!(driver.get_last_space(), b"row-data");
5661
5662 driver.reset(CompressionLevel::Fastest);
5663 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
5664}
5665
5666#[test]
5677fn driver_reset_from_row_backend_recycles_row_buffers_into_pool() {
5678 let mut driver = MatchGeneratorDriver::new(8, 1);
5679 driver.reset(CompressionLevel::Level(4));
5680 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
5681
5682 let mut space = driver.get_next_space();
5683 space.extend_from_slice(b"row-data-to-recycle");
5684 driver.commit_space(space);
5685
5686 let before_pool = driver.vec_pool.len();
5687 driver.reset(CompressionLevel::Fastest);
5688
5689 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
5690 assert!(
5696 driver.vec_pool.len() > before_pool,
5697 "row reset must recycle the committed row history buffer into vec_pool \
5698 (before_pool = {before_pool}, after = {})",
5699 driver.vec_pool.len()
5700 );
5701}
5702
5703#[test]
5704fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
5705 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
5706 params.window_log = 22;
5707 let adjusted = adjust_params_for_source_size(params, 0);
5708 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
5709}
5710
5711#[test]
5712fn common_prefix_len_matches_scalar_reference_across_offsets() {
5713 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
5714 a.iter()
5715 .zip(b.iter())
5716 .take_while(|(lhs, rhs)| lhs == rhs)
5717 .count()
5718 }
5719
5720 for total_len in [
5721 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
5722 ] {
5723 let base: Vec<u8> = (0..total_len)
5724 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
5725 .collect();
5726
5727 for start in [0usize, 1, 3] {
5728 if start > total_len {
5729 continue;
5730 }
5731 let a = &base[start..];
5732 let b = a.to_vec();
5733 assert_eq!(
5734 common_prefix_len(a, &b),
5735 scalar_reference(a, &b),
5736 "equal slices total_len={total_len} start={start}"
5737 );
5738
5739 let len = a.len();
5740 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
5741 if mismatch >= len {
5742 continue;
5743 }
5744 let mut altered = b.clone();
5745 altered[mismatch] ^= 0x5A;
5746 assert_eq!(
5747 common_prefix_len(a, &altered),
5748 scalar_reference(a, &altered),
5749 "total_len={total_len} start={start} mismatch={mismatch}"
5750 );
5751 }
5752
5753 if len > 0 {
5754 let mismatch = len - 1;
5755 let mut altered = b.clone();
5756 altered[mismatch] ^= 0xA5;
5757 assert_eq!(
5758 common_prefix_len(a, &altered),
5759 scalar_reference(a, &altered),
5760 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
5761 );
5762 }
5763 }
5764 }
5765
5766 let long = alloc::vec![0xAB; 320];
5767 let shorter = alloc::vec![0xAB; 137];
5768 assert_eq!(
5769 common_prefix_len(&long, &shorter),
5770 scalar_reference(&long, &shorter)
5771 );
5772}
5773
5774#[test]
5775fn row_pick_lazy_returns_none_when_next_is_better() {
5776 let mut matcher = RowMatchGenerator::new(1 << 22);
5777 matcher.configure(ROW_CONFIG);
5778 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
5779 matcher.ensure_tables();
5780
5781 let abs_pos = matcher.history_abs_start + 16;
5782 let best = MatchCandidate {
5783 start: abs_pos,
5784 offset: 8,
5785 match_len: ROW_MIN_MATCH_LEN,
5786 };
5787 assert!(
5788 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
5789 "lazy picker should defer when next position is clearly better"
5790 );
5791}
5792
5793#[test]
5794fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
5795 let mut matcher = RowMatchGenerator::new(1 << 22);
5796 matcher.configure(ROW_CONFIG);
5797 matcher.lazy_depth = 2;
5798 matcher.search_depth = 0;
5799 matcher.offset_hist = [6, 9, 1];
5800
5801 let mut data = alloc::vec![b'x'; 40];
5802 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
5803 matcher.add_data(data, |_| {});
5804 matcher.ensure_tables();
5805
5806 let abs_pos = matcher.history_abs_start + 20;
5807 let best = matcher
5808 .best_match(abs_pos, 0)
5809 .expect("expected baseline repcode match");
5810 assert_eq!(best.offset, 9);
5811 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
5812
5813 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
5814 assert!(next.match_len <= best.match_len);
5815 }
5816
5817 let next2 = matcher
5818 .best_match(abs_pos + 2, 2)
5819 .expect("expected +2 candidate");
5820 assert!(
5821 next2.match_len > best.match_len + 1,
5822 "+2 candidate must be significantly better for depth-2 lazy skip"
5823 );
5824 assert!(
5825 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
5826 "lazy picker should defer when +2 candidate is significantly better"
5827 );
5828}
5829
5830#[test]
5831fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
5832 let mut matcher = RowMatchGenerator::new(1 << 22);
5833 matcher.configure(ROW_CONFIG);
5834 matcher.lazy_depth = 2;
5835 matcher.search_depth = 0;
5836 matcher.offset_hist = [6, 9, 1];
5837
5838 let mut data = alloc::vec![b'x'; 40];
5839 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
5840 matcher.add_data(data, |_| {});
5841 matcher.ensure_tables();
5842
5843 let abs_pos = matcher.history_abs_start + 20;
5844 let best = matcher
5845 .best_match(abs_pos, 0)
5846 .expect("expected baseline repcode match");
5847 assert_eq!(best.offset, 9);
5848 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
5849
5850 let next2 = matcher
5851 .best_match(abs_pos + 2, 2)
5852 .expect("expected +2 candidate");
5853 assert_eq!(next2.match_len, best.match_len + 1);
5854 let chosen = matcher
5855 .pick_lazy_match(abs_pos, 0, Some(best))
5856 .expect("lazy picker should keep current best");
5857 assert_eq!(chosen.start, best.start);
5858 assert_eq!(chosen.offset, best.offset);
5859 assert_eq!(chosen.match_len, best.match_len);
5860}
5861
5862#[test]
5864fn row_hash_and_row_extracts_high_bits() {
5865 let mut matcher = RowMatchGenerator::new(1 << 22);
5866 matcher.configure(ROW_CONFIG);
5867 matcher.add_data(
5868 alloc::vec![
5869 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
5870 0x70, 0x80,
5871 ],
5872 |_| {},
5873 );
5874 matcher.ensure_tables();
5875
5876 let pos = matcher.history_abs_start + 8;
5877 let (row, tag) = matcher
5878 .hash_and_row(pos)
5879 .expect("row hash should be available");
5880
5881 let idx = pos - matcher.history_abs_start;
5882 let concat = matcher.live_history();
5883 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
5884 let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(matcher.hash_kernel, value);
5885 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
5886 let combined = hash >> (u64::BITS as usize - total_bits);
5887 let expected_row =
5888 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
5889 let expected_tag = combined as u8;
5890
5891 assert_eq!(row, expected_row);
5892 assert_eq!(tag, expected_tag);
5893}
5894
5895#[test]
5896fn row_repcode_skips_candidate_before_history_start() {
5897 let mut matcher = RowMatchGenerator::new(1 << 22);
5898 matcher.configure(ROW_CONFIG);
5899 matcher.history = alloc::vec![b'a'; 20];
5900 matcher.history_start = 0;
5901 matcher.history_abs_start = 10;
5902 matcher.offset_hist = [3, 0, 0];
5903
5904 assert!(matcher.repcode_candidate(12, 1).is_none());
5905}
5906
5907#[test]
5908fn row_repcode_returns_none_when_position_too_close_to_history_end() {
5909 let mut matcher = RowMatchGenerator::new(1 << 22);
5910 matcher.configure(ROW_CONFIG);
5911 matcher.history = b"abcde".to_vec();
5912 matcher.history_start = 0;
5913 matcher.history_abs_start = 0;
5914 matcher.offset_hist = [1, 0, 0];
5915
5916 assert!(matcher.repcode_candidate(4, 1).is_none());
5917}
5918
5919#[cfg(all(feature = "std", target_arch = "x86_64"))]
5920#[test]
5921fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
5922 use crate::encoding::fastpath::{self, FastpathKernel};
5923 if !is_x86_feature_detected!("sse4.2") {
5924 return;
5925 }
5926 let v = 0x0123_4567_89AB_CDEFu64;
5927 let accelerated = unsafe { fastpath::sse42::hash_mix_u64(v) };
5929 let dispatched = fastpath::dispatch_hash_mix_u64(v);
5931 let kernel = fastpath::select_kernel();
5932 if kernel == FastpathKernel::Sse42 {
5933 assert_eq!(dispatched, accelerated);
5934 } else {
5935 assert_eq!(dispatched, accelerated, "AVX2/SSE4.2 share CRC32 mix");
5937 }
5938}
5939
5940#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
5941#[test]
5942fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
5943 use crate::encoding::fastpath;
5944 if !is_aarch64_feature_detected!("crc") {
5945 return;
5946 }
5947 let v = 0x0123_4567_89AB_CDEFu64;
5948 let accelerated = unsafe { fastpath::neon::hash_mix_u64(v) };
5950 let dispatched = fastpath::dispatch_hash_mix_u64(v);
5951 assert_eq!(dispatched, accelerated);
5952}
5953
5954#[test]
5955fn hc_hash3_position_matches_donor_formula() {
5956 let bytes = [b'a', b'b', b'c', b'd'];
5957 let read32 = u32::from_le_bytes(bytes);
5958 let expected = (((read32 << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - HC3_HASH_LOG)) as usize;
5959 assert_eq!(
5960 super::match_table::storage::MatchTable::hash3_position(&bytes, HC3_HASH_LOG),
5961 expected
5962 );
5963}
5964
5965#[test]
5966fn hc_hash_position_matches_donor_hash4_formula() {
5967 let mut hc = HcMatchGenerator::new(1 << 20);
5968 hc.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
5969 let bytes = [b'a', b'b', b'c', b'd'];
5970 let read32 = u32::from_le_bytes(bytes);
5971 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
5972 assert_eq!(hc.table.hash_position(&bytes), expected);
5973}
5974
5975#[test]
5976fn btultra2_main_hash_uses_donor_hash4_formula() {
5977 let mut hc = HcMatchGenerator::new(1 << 20);
5978 hc.configure(
5979 BTULTRA2_HC_CONFIG_L22,
5980 super::strategy::StrategyTag::BtUltra2,
5981 27,
5982 );
5983 let bytes = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
5984 let read32 = u32::from_le_bytes(bytes[..4].try_into().unwrap());
5985 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
5986 let actual = super::match_table::storage::MatchTable::hash_position_with_mls(
5987 &bytes,
5988 hc.table.hash_log,
5989 super::bt::BtMatcher::HASH_MLS,
5990 );
5991 assert_eq!(actual, expected);
5992}
5993
5994#[test]
5995fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
5996 let mut matcher = RowMatchGenerator::new(1 << 22);
5997 matcher.configure(ROW_CONFIG);
5998 matcher.history = b"abcde".to_vec();
5999 matcher.history_start = 0;
6000 matcher.history_abs_start = 0;
6001
6002 assert!(matcher.row_candidate(0, 0).is_none());
6003}
6004
6005#[test]
6006fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
6007 let mut hc = HcMatchGenerator::new(32);
6008 hc.table.history = b"abc".to_vec();
6009 hc.table.history_start = 0;
6010 hc.table.history_abs_start = 0;
6011 hc.table.ensure_tables();
6012
6013 let candidates = hc.hc.chain_candidates(&hc.table, 0);
6014 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
6015}
6016
6017#[test]
6018fn hc_reset_refills_existing_tables_with_empty_sentinel() {
6019 let mut hc = HcMatchGenerator::new(32);
6020 hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
6021 hc.table.ensure_tables();
6022 assert!(!hc.table.hash_table.is_empty());
6023 assert!(!hc.table.chain_table.is_empty());
6024 hc.table.hash_table.fill(123);
6025 hc.table.chain_table.fill(456);
6026
6027 hc.reset(|_| {});
6028
6029 assert!(hc.table.hash_table.iter().all(|&v| v == HC_EMPTY));
6030 assert!(hc.table.chain_table.iter().all(|&v| v == HC_EMPTY));
6031}
6032
6033#[test]
6034fn hc_start_matching_returns_early_for_empty_current_block() {
6035 let mut hc = HcMatchGenerator::new(32);
6036 hc.table.add_data(Vec::new(), |_| {});
6037 let mut called = false;
6038 hc.start_matching(|_| called = true);
6039 assert!(!called, "empty current block should not emit sequences");
6040}
6041
6042#[cfg(test)]
6043fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
6044 let mut out = Vec::with_capacity(len);
6045 let mut state = seed;
6046 for _ in 0..len {
6047 state ^= state << 13;
6048 state ^= state >> 7;
6049 state ^= state << 17;
6050 out.push((state >> 40) as u8);
6051 }
6052 out
6053}
6054
6055#[cfg(test)]
6056fn level22_donor_block_ranges(data: &[u8]) -> Vec<(usize, usize)> {
6057 let mut ranges = Vec::new();
6058 let mut cursor = 0usize;
6059 let mut savings = 0i64;
6060 while cursor < data.len() {
6061 let remaining = data.len() - cursor;
6062 let candidate_len = remaining.min(HC_BLOCKSIZE_MAX);
6063 let block_len = crate::encoding::frame_compressor::donor_optimal_block_size(
6064 CompressionLevel::Level(22),
6065 &data[cursor..cursor + candidate_len],
6066 remaining,
6067 HC_BLOCKSIZE_MAX,
6068 savings,
6069 )
6070 .min(candidate_len)
6071 .max(1);
6072 ranges.push((cursor, block_len));
6073 cursor += block_len;
6074 if cursor >= HC_BLOCKSIZE_MAX {
6078 savings = 3;
6079 }
6080 }
6081 ranges
6082}
6083
6084#[cfg(test)]
6085fn merge_block_delimiters_like_donor(
6086 sequences: Vec<(usize, usize, usize)>,
6087) -> Vec<(usize, usize, usize)> {
6088 let mut out = Vec::with_capacity(sequences.len());
6089 let mut pending_lits = 0usize;
6090 for (lit_len, offset, match_len) in sequences {
6091 if offset == 0 && match_len == 0 {
6092 pending_lits = pending_lits.saturating_add(lit_len);
6093 continue;
6094 }
6095 out.push((lit_len.saturating_add(pending_lits), offset, match_len));
6096 pending_lits = 0;
6097 }
6098 if pending_lits > 0 {
6099 out.push((pending_lits, 0, 0));
6100 }
6101 out
6102}
6103
6104#[cfg(test)]
6105fn collect_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6106 merge_block_delimiters_like_donor(collect_level22_sequences_with_delimiters(data))
6107 .into_iter()
6108 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6109 .collect()
6110}
6111
6112#[cfg(test)]
6113fn collect_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6114 let mut driver = MatchGeneratorDriver::new(HC_BLOCKSIZE_MAX, 1);
6115 driver.set_source_size_hint(data.len() as u64);
6116 driver.reset(CompressionLevel::Level(22));
6117
6118 let mut sequences = Vec::new();
6119 for (chunk_start, chunk_len) in level22_donor_block_ranges(data) {
6120 let chunk = &data[chunk_start..chunk_start + chunk_len];
6121 let mut space = driver.get_next_space();
6122 space[..chunk.len()].copy_from_slice(chunk);
6123 space.truncate(chunk.len());
6124 driver.commit_space(space);
6125 driver.start_matching(|seq| {
6126 let entry = match seq {
6127 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6128 Sequence::Triple {
6129 literals,
6130 offset,
6131 match_len,
6132 } => (literals.len(), offset, match_len),
6133 };
6134 sequences.push(entry);
6135 });
6136 }
6137 sequences
6138}
6139
6140#[cfg(test)]
6141fn donor_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6142 merge_block_delimiters_like_donor(donor_level22_sequences_with_delimiters(data))
6143 .into_iter()
6144 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6145 .collect()
6146}
6147
6148#[cfg(test)]
6149fn donor_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6150 use zstd::zstd_safe;
6151 use zstd::zstd_safe::zstd_sys;
6152
6153 fn assert_zstd_ok(code: usize, context: &str) {
6154 assert_eq!(
6155 unsafe { zstd_sys::ZSTD_isError(code) },
6156 0,
6157 "{context} failed: {}",
6158 zstd_safe::get_error_name(code)
6159 );
6160 }
6161
6162 unsafe {
6163 let cctx = zstd_sys::ZSTD_createCCtx();
6164 assert!(!cctx.is_null(), "ZSTD_createCCtx returned null");
6165
6166 assert_zstd_ok(
6167 zstd_sys::ZSTD_CCtx_setParameter(
6168 cctx,
6169 zstd_sys::ZSTD_cParameter::ZSTD_c_compressionLevel,
6170 22,
6171 ),
6172 "ZSTD_c_compressionLevel",
6173 );
6174
6175 let seq_capacity = zstd_safe::sequence_bound(data.len());
6176 let mut seqs = alloc::vec![
6177 zstd_sys::ZSTD_Sequence {
6178 offset: 0,
6179 litLength: 0,
6180 matchLength: 0,
6181 rep: 0,
6182 };
6183 seq_capacity
6184 ];
6185
6186 let seq_count = zstd_sys::ZSTD_generateSequences(
6187 cctx,
6188 seqs.as_mut_ptr(),
6189 seqs.len(),
6190 data.as_ptr().cast(),
6191 data.len(),
6192 );
6193 assert_zstd_ok(seq_count, "ZSTD_generateSequences");
6194 let rc = zstd_sys::ZSTD_freeCCtx(cctx);
6195 assert_eq!(rc, 0, "ZSTD_freeCCtx failed");
6196
6197 seqs.truncate(seq_count);
6198 seqs.into_iter()
6199 .map(|seq| {
6200 (
6201 seq.litLength as usize,
6202 seq.offset as usize,
6203 seq.matchLength as usize,
6204 )
6205 })
6206 .collect()
6207 }
6208}
6209
6210#[test]
6211fn level22_sequences_match_donor_on_corpus_proxy() {
6212 let data = include_bytes!("../../decodecorpus_files/z000033");
6213 assert_level22_sequences_match_donor(data);
6214}
6215
6216#[test]
6217fn level22_sequences_match_donor_on_small_corpus_proxy() {
6218 let data = include_bytes!("../../decodecorpus_files/z000030");
6219 assert_level22_sequences_match_donor(data);
6220}
6221
6222#[cfg(test)]
6223fn assert_level22_sequences_match_donor(data: &[u8]) {
6224 let rust = collect_level22_sequences(data);
6225 let donor = donor_level22_sequences(data);
6226
6227 if rust != donor {
6228 let first_diff = rust
6229 .iter()
6230 .zip(donor.iter())
6231 .position(|(lhs, rhs)| lhs != rhs)
6232 .unwrap_or_else(|| rust.len().min(donor.len()));
6233 let rust_pos = rust
6234 .iter()
6235 .take(first_diff)
6236 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6237 let donor_pos = donor
6238 .iter()
6239 .take(first_diff)
6240 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6241 let start = first_diff.saturating_sub(4);
6242 let rust_window = &rust[start..rust.len().min(first_diff + 4)];
6243 let donor_window = &donor[start..donor.len().min(first_diff + 4)];
6244 let mut reps = [1u32, 4, 8];
6245 for (lit_len, offset, _) in rust.iter().take(first_diff) {
6246 let _ = encode_offset_with_history(*offset as u32, *lit_len as u32, &mut reps);
6247 }
6248 panic!(
6249 "level22 sequence path diverged at idx {}: rust={:?} donor={:?} (rust_len={} donor_len={} rust_pos={} donor_pos={} reps_before={:?} rust_window={:?} donor_window={:?} block_ranges={:?})",
6250 first_diff,
6251 rust.get(first_diff),
6252 donor.get(first_diff),
6253 rust.len(),
6254 donor.len(),
6255 rust_pos,
6256 donor_pos,
6257 reps,
6258 rust_window,
6259 donor_window,
6260 level22_donor_block_ranges(data)
6261 .into_iter()
6262 .filter(|(start, len)| *start <= rust_pos && rust_pos < start + len)
6263 .collect::<Vec<_>>(),
6264 );
6265 }
6266}
6267
6268#[test]
6269fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
6270 let mut matcher = HcMatchGenerator::new(1 << 22);
6271 let tail = b"Qz9kLm2Rp";
6272 let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
6273 let tail_start = first.len() - tail.len();
6274 first[tail_start..].copy_from_slice(tail);
6275 matcher.table.add_data(first.clone(), |_| {});
6276 matcher.skip_matching(Some(true));
6277
6278 let mut second = tail.to_vec();
6279 second.extend_from_slice(b"after-tail-literals");
6280 matcher.table.add_data(second, |_| {});
6281
6282 let mut first_sequence = None;
6283 matcher.start_matching(|seq| {
6284 if first_sequence.is_some() {
6285 return;
6286 }
6287 first_sequence = Some(match seq {
6288 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6289 Sequence::Triple {
6290 literals,
6291 offset,
6292 match_len,
6293 } => (literals.len(), offset, match_len),
6294 });
6295 });
6296
6297 let (literals_len, offset, match_len) =
6298 first_sequence.expect("expected at least one sequence after sparse skip");
6299 assert_eq!(
6300 literals_len, 0,
6301 "first sequence should start at block boundary"
6302 );
6303 assert_eq!(
6304 offset,
6305 tail.len(),
6306 "first match should reference previous tail"
6307 );
6308 assert!(
6309 match_len >= tail.len(),
6310 "tail-aligned cross-block match must be preserved"
6311 );
6312}
6313
6314#[test]
6315fn btultra2_sparse_skip_matching_preserves_tail_cross_block_match() {
6316 let mut matcher = HcMatchGenerator::new(1 << 20);
6317 matcher.configure(
6318 BTULTRA2_HC_CONFIG_L22,
6319 super::strategy::StrategyTag::BtUltra2,
6320 20,
6321 );
6322 let tail = b"Bt9kLm2Rp";
6323 let mut first = deterministic_high_entropy_bytes(0xA9C3_7F21_D4E8_510B, 4096);
6324 let tail_start = first.len() - tail.len();
6325 first[tail_start..].copy_from_slice(tail);
6326 matcher.table.add_data(first, |_| {});
6327 matcher.skip_matching(Some(true));
6328
6329 let mut second = tail.to_vec();
6330 second.extend_from_slice(b"after-tail-literals");
6331 matcher.table.add_data(second, |_| {});
6332
6333 let mut first_sequence = None;
6334 matcher.start_matching(|seq| {
6335 if first_sequence.is_some() {
6336 return;
6337 }
6338 first_sequence = Some(match seq {
6339 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6340 Sequence::Triple {
6341 literals,
6342 offset,
6343 match_len,
6344 } => (literals.len(), offset, match_len),
6345 });
6346 });
6347
6348 let (literals_len, offset, match_len) =
6349 first_sequence.expect("expected at least one sequence after sparse BT skip");
6350 assert_eq!(
6351 literals_len, 0,
6352 "BT sparse skip should preserve an immediate boundary match"
6353 );
6354 assert_eq!(
6355 offset,
6356 tail.len(),
6357 "first BT match should reference previous tail"
6358 );
6359 assert!(
6360 match_len >= tail.len(),
6361 "BT sparse skip must seed the dense tail for cross-block matching"
6362 );
6363}
6364
6365#[test]
6366fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
6367 let mut matcher = HcMatchGenerator::new(1 << 22);
6368 let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
6369 matcher.table.add_data(first.clone(), |_| {});
6370 matcher.skip_matching(Some(true));
6371
6372 let current_len = first.len();
6373 let current_abs_start =
6374 matcher.table.history_abs_start + matcher.table.window_size - current_len;
6375 let current_abs_end = current_abs_start + current_len;
6376 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
6377 let tail_start = current_abs_end
6378 .saturating_sub(dense_tail)
6379 .max(matcher.table.history_abs_start)
6380 .max(current_abs_start);
6381
6382 let overlap_pos = (tail_start..current_abs_end)
6383 .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
6384 .expect("fixture should contain at least one sparse-grid overlap in dense tail");
6385
6386 let rel = matcher
6387 .table
6388 .relative_position(overlap_pos)
6389 .expect("overlap position should be representable as relative position");
6390 let chain_idx = rel as usize & ((1 << matcher.table.chain_log) - 1);
6391 assert_ne!(
6392 matcher.table.chain_table[chain_idx],
6393 rel + 1,
6394 "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
6395 );
6396}
6397
6398#[test]
6399fn hc_compact_history_drains_when_threshold_crossed() {
6400 let mut hc = HcMatchGenerator::new(8);
6401 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
6402 hc.table.history_start = 16;
6403 hc.table.compact_history();
6404 assert_eq!(hc.table.history_start, 0);
6405 assert_eq!(hc.table.history, b"qrstuvwxyz");
6406}
6407
6408#[test]
6409fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
6410 let mut hc = HcMatchGenerator::new(32);
6411 hc.table.history = b"abcdefghijklmnop".to_vec();
6412 hc.table.history_abs_start = 0;
6413 hc.table.position_base = 1;
6414 hc.table.ensure_tables();
6415 let before_hash = hc.table.hash_table.clone();
6416 let before_chain = hc.table.chain_table.clone();
6417
6418 hc.table.insert_position_no_rebase(0);
6419
6420 assert_eq!(hc.table.hash_table, before_hash);
6421 assert_eq!(hc.table.chain_table, before_chain);
6422}
6423
6424#[test]
6425fn hc_insert_positions_advances_next_to_update3_for_contiguous_range() {
6426 let mut hc = HcMatchGenerator::new(64);
6427 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
6428 hc.table.history_start = 0;
6429 hc.table.history_abs_start = 0;
6430 hc.table.position_base = 0;
6431 hc.table.ensure_tables();
6432 hc.table.next_to_update3 = 0;
6433
6434 hc.table.insert_positions(0, 9);
6435
6436 assert_eq!(
6437 hc.table.next_to_update3, 9,
6438 "contiguous insert_positions should advance hash3 update cursor"
6439 );
6440}
6441
6442#[test]
6443fn hc_insert_positions_with_step_keeps_next_to_update3_cursor_for_sparse_ranges() {
6444 let mut hc = HcMatchGenerator::new(64);
6445 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
6446 hc.table.history_start = 0;
6447 hc.table.history_abs_start = 0;
6448 hc.table.position_base = 0;
6449 hc.table.ensure_tables();
6450 hc.table.next_to_update3 = 0;
6451
6452 hc.table.insert_positions_with_step(0, 16, 4);
6453
6454 assert_eq!(
6455 hc.table.next_to_update3, 0,
6456 "sparse insert_positions_with_step must not mark skipped positions as hash3-updated"
6457 );
6458}
6459
6460#[test]
6461fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
6462 let mut driver = MatchGeneratorDriver::new(8, 1);
6463 driver.reset(CompressionLevel::Fastest);
6464 driver.simple_mut().max_window_size = 8;
6467 driver.reported_window_size = 8;
6468
6469 let base_window = driver.simple_mut().max_window_size;
6470 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6471 assert_eq!(driver.simple_mut().max_window_size, base_window + 24);
6472
6473 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6474 let mut space = driver.get_next_space();
6475 space.clear();
6476 space.extend_from_slice(block);
6477 driver.commit_space(space);
6478 driver.skip_matching_with_hint(None);
6479 }
6480
6481 assert_eq!(
6482 driver.dictionary_retained_budget, 0,
6483 "dictionary budget should be fully retired once primed dict slices are evicted"
6484 );
6485 assert_eq!(
6486 driver.simple_mut().max_window_size,
6487 base_window,
6488 "retired dictionary budget must not remain reusable for live history"
6489 );
6490}
6491
6492#[test]
6493fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
6494 let mut driver = MatchGeneratorDriver::new(8, 1);
6495 driver.reset(CompressionLevel::Level(2));
6496 driver.dfast_matcher_mut().max_window_size = 8;
6499 driver.reported_window_size = 8;
6500
6501 let base_window = driver.dfast_matcher().max_window_size;
6502 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6503 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
6504
6505 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6506 let mut space = driver.get_next_space();
6507 space.clear();
6508 space.extend_from_slice(block);
6509 driver.commit_space(space);
6510 driver.skip_matching_with_hint(None);
6511 }
6512
6513 assert_eq!(
6514 driver.dictionary_retained_budget, 0,
6515 "dictionary budget should be fully retired once primed dict slices are evicted"
6516 );
6517 assert_eq!(
6518 driver.dfast_matcher().max_window_size,
6519 base_window,
6520 "retired dictionary budget must not remain reusable for live history"
6521 );
6522}
6523
6524#[test]
6525fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
6526 let mut driver = MatchGeneratorDriver::new(8, 1);
6527 driver.reset(CompressionLevel::Better);
6528
6529 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6530
6531 let mut space = driver.get_next_space();
6532 space.clear();
6533 space.extend_from_slice(b"abcdefgh");
6536 driver.commit_space(space);
6537
6538 let mut saw_match = false;
6539 driver.start_matching(|seq| {
6540 if let Sequence::Triple {
6541 literals,
6542 offset,
6543 match_len,
6544 } = seq
6545 && literals.is_empty()
6546 && offset == 8
6547 && match_len >= HC_MIN_MATCH_LEN
6548 {
6549 saw_match = true;
6550 }
6551 });
6552
6553 assert!(
6554 saw_match,
6555 "hash-chain backend should match dictionary-primed history in first full block"
6556 );
6557}
6558
6559#[test]
6560fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
6561 let mut driver = MatchGeneratorDriver::new(8, 1);
6562 driver.reset(CompressionLevel::Better);
6563 driver.hc_matcher_mut().table.max_window_size = 8;
6565 driver.reported_window_size = 8;
6566
6567 let base_window = driver.hc_matcher().table.max_window_size;
6568 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6569 assert_eq!(driver.hc_matcher().table.max_window_size, base_window + 24);
6570
6571 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6572 let mut space = driver.get_next_space();
6573 space.clear();
6574 space.extend_from_slice(block);
6575 driver.commit_space(space);
6576 driver.skip_matching_with_hint(None);
6577 }
6578
6579 assert_eq!(
6580 driver.dictionary_retained_budget, 0,
6581 "dictionary budget should be fully retired once primed dict slices are evicted"
6582 );
6583 assert_eq!(
6584 driver.hc_matcher().table.max_window_size,
6585 base_window,
6586 "retired dictionary budget must not remain reusable for live history"
6587 );
6588}
6589
6590#[test]
6591fn hc_rebases_positions_after_u32_boundary() {
6592 let mut matcher = HcMatchGenerator::new(64);
6593 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
6594 matcher.table.ensure_tables();
6595 matcher.table.position_base = 0;
6596 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
6597 Ok(value) => value,
6598 Err(_) => return,
6599 };
6600 matcher.table.history_abs_start = history_abs_start;
6603 matcher.skip_matching(None);
6604 assert_eq!(
6605 matcher.table.position_base, matcher.table.history_abs_start,
6606 "rebase should anchor to the oldest live absolute position"
6607 );
6608
6609 assert!(
6610 matcher
6611 .table
6612 .hash_table
6613 .iter()
6614 .any(|entry| *entry != HC_EMPTY),
6615 "HC hash table should still be populated after crossing u32 boundary"
6616 );
6617
6618 let abs_pos = matcher.table.history_abs_start + 10;
6620 let candidates = matcher.hc.chain_candidates(&matcher.table, abs_pos);
6621 assert!(
6622 candidates.iter().any(|candidate| *candidate != usize::MAX),
6623 "chain_candidates should return valid matches after rebase"
6624 );
6625}
6626
6627#[test]
6628fn hc_rebase_rebuilds_only_inserted_prefix() {
6629 let mut matcher = HcMatchGenerator::new(64);
6630 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
6631 matcher.table.ensure_tables();
6632 matcher.table.position_base = 0;
6633 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
6634 Ok(value) => value,
6635 Err(_) => return,
6636 };
6637 matcher.table.history_abs_start = history_abs_start;
6638 let abs_pos = matcher.table.history_abs_start + 6;
6639
6640 let mut expected = HcMatchGenerator::new(64);
6641 expected.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
6642 expected.table.ensure_tables();
6643 expected.table.history_abs_start = history_abs_start;
6644 expected.table.position_base = expected.table.history_abs_start;
6645 expected.table.hash_table.fill(HC_EMPTY);
6646 expected.table.chain_table.fill(HC_EMPTY);
6647 for pos in expected.table.history_abs_start..abs_pos {
6648 expected.table.insert_position_no_rebase(pos);
6649 }
6650
6651 matcher.table.maybe_rebase_positions(abs_pos);
6652
6653 assert_eq!(
6654 matcher.table.position_base, matcher.table.history_abs_start,
6655 "rebase should still anchor to the oldest live absolute position"
6656 );
6657 assert_eq!(
6658 matcher.table.hash_table, expected.table.hash_table,
6659 "rebase must rebuild only positions already inserted before abs_pos"
6660 );
6661 assert_eq!(
6662 matcher.table.chain_table, expected.table.chain_table,
6663 "future positions must not be pre-seeded into HC chains during rebase"
6664 );
6665}
6666
6667#[test]
6668fn suffix_store_with_single_slot_does_not_panic_on_keying() {
6669 let mut suffixes = SuffixStore::with_capacity(1);
6670 suffixes.insert(b"abcde", 0);
6671 assert!(suffixes.contains_key(b"abcde"));
6672 assert_eq!(suffixes.get(b"abcde"), Some(0));
6673}
6674
6675#[test]
6676fn fastest_reset_uses_interleaved_hash_fill_step() {
6677 let mut driver = MatchGeneratorDriver::new(32, 2);
6678
6679 driver.reset(CompressionLevel::Uncompressed);
6680 assert_eq!(driver.simple().hash_fill_step, 1);
6681
6682 driver.reset(CompressionLevel::Fastest);
6683 assert_eq!(driver.simple().hash_fill_step, FAST_HASH_FILL_STEP);
6684
6685 driver.reset(CompressionLevel::Better);
6688 assert_eq!(
6689 driver.active_backend(),
6690 super::strategy::BackendTag::HashChain
6691 );
6692 assert_eq!(driver.window_size(), (1u64 << 23));
6693 assert_eq!(driver.hc_matcher().hc.lazy_depth, 2);
6694}
6695
6696#[test]
6697fn simple_matcher_updates_offset_history_after_emitting_match() {
6698 let mut matcher = MatchGenerator::new(64);
6699 matcher.add_data(
6700 b"abcdeabcdeabcde".to_vec(),
6701 SuffixStore::with_capacity(64),
6702 |_, _| {},
6703 );
6704
6705 assert!(matcher.next_sequence(|seq| {
6706 assert_eq!(
6707 seq,
6708 Sequence::Triple {
6709 literals: b"abcde",
6710 offset: 5,
6711 match_len: 10,
6712 }
6713 );
6714 }));
6715 assert_eq!(matcher.offset_hist, [5, 1, 4]);
6716}
6717
6718#[test]
6719fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
6720 let mut matcher = MatchGenerator::new(64);
6721 matcher.add_data(
6722 b"abcdefghijabcdefghij".to_vec(),
6723 SuffixStore::with_capacity(64),
6724 |_, _| {},
6725 );
6726
6727 matcher.suffix_idx = 10;
6728 matcher.last_idx_in_sequence = 10;
6729 matcher.offset_hist = [99, 10, 4];
6730
6731 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
6732 assert_eq!(candidate, Some((10, 10)));
6733}
6734
6735#[test]
6736fn simple_matcher_repcode_can_target_previous_window_entry() {
6737 let mut matcher = MatchGenerator::new(64);
6738 matcher.add_data(
6739 b"abcdefghij".to_vec(),
6740 SuffixStore::with_capacity(64),
6741 |_, _| {},
6742 );
6743 matcher.skip_matching();
6744 matcher.add_data(
6745 b"abcdefghij".to_vec(),
6746 SuffixStore::with_capacity(64),
6747 |_, _| {},
6748 );
6749
6750 matcher.offset_hist = [99, 10, 4];
6751
6752 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
6753 assert_eq!(candidate, Some((10, 10)));
6754}
6755
6756#[test]
6757fn simple_matcher_zero_literal_repcode_checks_rep2() {
6758 let mut matcher = MatchGenerator::new(64);
6759 matcher.add_data(
6760 b"abcdefghijabcdefghij".to_vec(),
6761 SuffixStore::with_capacity(64),
6762 |_, _| {},
6763 );
6764 matcher.suffix_idx = 10;
6765 matcher.last_idx_in_sequence = 10;
6766 matcher.offset_hist = [99, 4, 10];
6768
6769 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
6770 assert_eq!(candidate, Some((10, 10)));
6771}
6772
6773#[test]
6774fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
6775 let mut matcher = MatchGenerator::new(64);
6776 matcher.add_data(
6777 b"abcdefghijabcdefghij".to_vec(),
6778 SuffixStore::with_capacity(64),
6779 |_, _| {},
6780 );
6781 matcher.suffix_idx = 10;
6782 matcher.last_idx_in_sequence = 10;
6783 matcher.offset_hist = [11, 4, 99];
6785
6786 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
6787 assert_eq!(candidate, Some((10, 10)));
6788}
6789
6790#[test]
6791fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
6792 let mut matcher = MatchGenerator::new(64);
6793 matcher.add_data(
6794 b"abcdefghij".to_vec(),
6795 SuffixStore::with_capacity(64),
6796 |_, _| {},
6797 );
6798 matcher.skip_matching();
6799 matcher.add_data(
6800 b"klmnopqrst".to_vec(),
6801 SuffixStore::with_capacity(64),
6802 |_, _| {},
6803 );
6804 matcher.suffix_idx = 3;
6805
6806 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
6807 assert_eq!(candidate, None);
6808}
6809
6810#[test]
6811fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
6812 let mut matcher = MatchGenerator::new(64);
6813 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
6814 matcher.add_data(
6815 b"abcdefghijklmnop".to_vec(),
6816 SuffixStore::with_capacity(64),
6817 |_, _| {},
6818 );
6819 matcher.skip_matching();
6820 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
6821
6822 assert!(matcher.next_sequence(|seq| {
6823 assert_eq!(
6824 seq,
6825 Sequence::Triple {
6826 literals: b"",
6827 offset: 15,
6828 match_len: 5,
6829 }
6830 );
6831 }));
6832 assert!(!matcher.next_sequence(|_| {}));
6833}
6834
6835#[test]
6836fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
6837 let mut matcher = MatchGenerator::new(128);
6838 let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
6839 let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
6840 let tail_start = first.len() - MIN_MATCH_LEN;
6841 let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
6842 matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
6843
6844 matcher.skip_matching_with_hint(Some(true));
6845
6846 matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
6848 let mut sparse_first_is_literals = None;
6849 assert!(matcher.next_sequence(|seq| {
6850 if sparse_first_is_literals.is_none() {
6851 sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
6852 }
6853 }));
6854 assert!(
6855 sparse_first_is_literals.unwrap_or(false),
6856 "sparse-start probe should not produce an immediate match"
6857 );
6858
6859 let mut matcher = MatchGenerator::new(128);
6861 matcher.add_data(
6862 b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
6863 SuffixStore::with_capacity(256),
6864 |_, _| {},
6865 );
6866 matcher.skip_matching_with_hint(Some(true));
6867 matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
6868 let mut tail_first_is_immediate_match = None;
6869 assert!(matcher.next_sequence(|seq| {
6870 if tail_first_is_immediate_match.is_none() {
6871 tail_first_is_immediate_match =
6872 Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
6873 }
6874 }));
6875 assert!(
6876 tail_first_is_immediate_match.unwrap_or(false),
6877 "dense tail probe should match immediately at block start"
6878 );
6879}
6880
6881#[test]
6882fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
6883 let mut matcher = MatchGenerator::new(64);
6884 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
6885 matcher.add_data(
6886 b"01234abcde".to_vec(),
6887 SuffixStore::with_capacity(64),
6888 |_, _| {},
6889 );
6890 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
6891
6892 let last = matcher.window.last().unwrap();
6893 let tail = &last.data[5..10];
6894 assert_eq!(last.suffixes.get(tail), Some(5));
6895}
6896
6897#[test]
6898fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
6899 let mut matcher = MatchGenerator::new(128);
6900 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
6901 matcher.add_data(
6902 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
6903 SuffixStore::with_capacity(1 << 16),
6904 |_, _| {},
6905 );
6906
6907 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
6908
6909 let last = matcher.window.last().unwrap();
6910 let first_key = &last.data[..MIN_MATCH_LEN];
6911 assert_eq!(last.suffixes.get(first_key), None);
6912}
6913
6914#[test]
6915fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
6916 let mut matcher = MatchGenerator::new(128);
6917 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
6918 matcher.add_data(
6919 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
6920 SuffixStore::with_capacity(1 << 16),
6921 |_, _| {},
6922 );
6923
6924 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
6925
6926 let last = matcher.window.last().unwrap();
6927 for pos in [0usize, 3, 6, 9, 12] {
6928 let key = &last.data[pos..pos + MIN_MATCH_LEN];
6929 assert_eq!(
6930 last.suffixes.get(key),
6931 Some(pos),
6932 "expected interleaved suffix registration at pos {pos}"
6933 );
6934 }
6935}
6936
6937#[test]
6938fn dfast_skip_matching_handles_window_eviction() {
6939 let mut matcher = DfastMatchGenerator::new(16);
6940
6941 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
6942 matcher.skip_matching(None);
6943 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
6944 matcher.skip_matching(None);
6945 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
6946
6947 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
6948 matcher.start_matching(|seq| match seq {
6949 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6950 Sequence::Triple {
6951 literals,
6952 offset,
6953 match_len,
6954 } => {
6955 reconstructed.extend_from_slice(literals);
6956 let start = reconstructed.len() - offset;
6957 for i in 0..match_len {
6958 let byte = reconstructed[start + i];
6959 reconstructed.push(byte);
6960 }
6961 }
6962 });
6963
6964 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
6965}
6966
6967#[test]
6968fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
6969 let mut matcher = DfastMatchGenerator::new(8);
6970
6971 let mut first = Vec::with_capacity(64);
6972 first.extend_from_slice(b"abcdefgh");
6973 matcher.add_data(first, |_| {});
6974
6975 let mut second = Vec::with_capacity(64);
6976 second.extend_from_slice(b"ijklmnop");
6977
6978 let mut observed_evicted_len = None;
6979 matcher.add_data(second, |data| {
6980 observed_evicted_len = Some(data.len());
6981 });
6982
6983 assert_eq!(
6984 observed_evicted_len,
6985 Some(8),
6986 "eviction callback must report evicted byte length, not backing capacity"
6987 );
6988}
6989
6990#[test]
7054fn dfast_commit_space_eviction_uses_window_size_delta() {
7055 use crate::encoding::CompressionLevel;
7056
7057 let mut driver = MatchGeneratorDriver::new(10, 1);
7058 driver.reset(CompressionLevel::Level(2));
7059 assert!(matches!(driver.storage, MatcherStorage::Dfast(_)));
7060
7061 driver.dfast_matcher_mut().max_window_size = 10;
7066 driver.dictionary_retained_budget = 100;
7067
7068 let mut space1 = Vec::with_capacity(64);
7069 space1.extend_from_slice(b"abcd");
7070 driver.commit_space(space1);
7071 assert_eq!(
7072 driver.dictionary_retained_budget, 100,
7073 "1st commit fills window 0 → 4, no eviction, no retire"
7074 );
7075
7076 let mut space2 = Vec::with_capacity(64);
7077 space2.extend_from_slice(b"efgh");
7078 driver.commit_space(space2);
7079 assert_eq!(
7080 driver.dictionary_retained_budget, 100,
7081 "2nd commit fills window 4 → 8, no eviction, no retire"
7082 );
7083
7084 let mut space3 = Vec::with_capacity(64);
7085 space3.extend_from_slice(b"ijklm");
7086 driver.commit_space(space3);
7087 assert_eq!(
7088 driver.dictionary_retained_budget, 87,
7089 "3rd commit + trim_after_budget_retire cascade. With the fix \
7090 (evicted=4 from window_size delta) the cascade reclaims 100 \
7091 → 96 → 92 → 87. With the bug (evicted=5 from data.len()) the \
7092 3rd commit would panic on `data.len() <= max_window_size` \
7093 after the 2nd commit's cascade had already shrunk \
7094 max_window_size to 0."
7095 );
7096 assert_eq!(
7097 driver.dfast_matcher_mut().max_window_size,
7098 0,
7099 "cascade drains max_window_size to 0 once budget reclaim \
7100 exceeds the initial window size"
7101 );
7102}
7103
7104#[test]
7105fn dfast_trim_to_window_evicts_oldest_block_by_length() {
7106 let mut matcher = DfastMatchGenerator::new(16);
7115
7116 let mut first = Vec::with_capacity(64);
7117 first.extend_from_slice(b"abcdefgh");
7118 matcher.add_data(first, |_| {});
7119
7120 let mut second = Vec::with_capacity(64);
7121 second.extend_from_slice(b"ijklmnop");
7122 matcher.add_data(second, |_| {});
7123
7124 assert_eq!(matcher.window_size, 16);
7125 assert_eq!(matcher.window_blocks.len(), 2);
7126
7127 matcher.max_window_size = 8;
7128
7129 matcher.trim_to_window();
7130
7131 assert_eq!(
7139 matcher.window_size, 8,
7140 "exactly one 8-byte block must remain"
7141 );
7142 assert_eq!(matcher.window_blocks.len(), 1);
7143 assert_eq!(matcher.history_abs_start, 8);
7144}
7145
7146#[test]
7147fn dfast_inserts_tail_positions_for_next_block_matching() {
7148 let mut matcher = DfastMatchGenerator::new(1 << 22);
7149
7150 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
7151 let mut history = Vec::new();
7152 matcher.start_matching(|seq| match seq {
7153 Sequence::Literals { literals } => history.extend_from_slice(literals),
7154 Sequence::Triple { .. } => unreachable!("first block should not match history"),
7155 });
7156 assert_eq!(history, b"012345bcdea");
7157
7158 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
7159 let mut saw_first_sequence = false;
7160 matcher.start_matching(|seq| {
7161 assert!(!saw_first_sequence, "expected a single cross-block match");
7162 saw_first_sequence = true;
7163 match seq {
7164 Sequence::Literals { .. } => {
7165 panic!("expected tail-anchored cross-block match before any literals")
7166 }
7167 Sequence::Triple {
7168 literals,
7169 offset,
7170 match_len,
7171 } => {
7172 assert_eq!(literals, b"");
7173 assert_eq!(offset, 5);
7174 assert_eq!(match_len, 11);
7175 let start = history.len() - offset;
7176 for i in 0..match_len {
7177 let byte = history[start + i];
7178 history.push(byte);
7179 }
7180 }
7181 }
7182 });
7183
7184 assert!(
7185 saw_first_sequence,
7186 "expected tail-anchored cross-block match"
7187 );
7188 assert_eq!(history, b"012345bcdeabcdeabcdeab");
7189}
7190
7191#[test]
7192fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
7193 let mut matcher = DfastMatchGenerator::new(1 << 22);
7194 let tail = b"Qz9kLm2Rp";
7195 let mut first = b"0123456789abcdef".to_vec();
7196 first.extend_from_slice(tail);
7197 matcher.add_data(first.clone(), |_| {});
7198 matcher.skip_matching(Some(false));
7199
7200 let mut second = tail.to_vec();
7201 second.extend_from_slice(b"after-tail-literals");
7202 matcher.add_data(second, |_| {});
7203
7204 let mut first_sequence = None;
7205 matcher.start_matching(|seq| {
7206 if first_sequence.is_some() {
7207 return;
7208 }
7209 first_sequence = Some(match seq {
7210 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7211 Sequence::Triple {
7212 literals,
7213 offset,
7214 match_len,
7215 } => (literals.len(), offset, match_len),
7216 });
7217 });
7218
7219 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7220 assert_eq!(
7221 lit_len, 0,
7222 "expected immediate cross-block match at block start"
7223 );
7224 assert_eq!(
7225 offset,
7226 tail.len(),
7227 "expected dense skip to preserve cross-boundary tail match"
7228 );
7229 assert!(
7230 match_len >= DFAST_MIN_MATCH_LEN,
7231 "match length should satisfy dfast minimum match length"
7232 );
7233}
7234
7235#[test]
7236fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
7237 let mut matcher = DfastMatchGenerator::new(1 << 22);
7238 let tail = b"Qz9kLm2Rp";
7239 let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
7240 let tail_start = first.len() - tail.len();
7241 first[tail_start..].copy_from_slice(tail);
7242 matcher.add_data(first.clone(), |_| {});
7243
7244 matcher.skip_matching(Some(true));
7245
7246 let mut second = tail.to_vec();
7247 second.extend_from_slice(b"after-tail-literals");
7248 matcher.add_data(second, |_| {});
7249
7250 let mut first_sequence = None;
7251 matcher.start_matching(|seq| {
7252 if first_sequence.is_some() {
7253 return;
7254 }
7255 first_sequence = Some(match seq {
7256 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7257 Sequence::Triple {
7258 literals,
7259 offset,
7260 match_len,
7261 } => (literals.len(), offset, match_len),
7262 });
7263 });
7264
7265 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7266 assert_eq!(
7267 lit_len, 0,
7268 "expected immediate cross-block match at block start"
7269 );
7270 assert_eq!(
7271 offset,
7272 tail.len(),
7273 "expected match against densely seeded tail"
7274 );
7275 assert!(
7276 match_len >= DFAST_MIN_MATCH_LEN,
7277 "match length should satisfy dfast minimum match length"
7278 );
7279}
7280
7281#[test]
7282fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
7283 let mut matcher = DfastMatchGenerator::new(1 << 22);
7284 let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
7285 let first_len = first.len();
7286 matcher.add_data(first, |_| {});
7287 matcher.skip_matching_dense();
7288
7289 matcher.add_data(alloc::vec![0xAB], |_| {});
7292 matcher.skip_matching_dense();
7293
7294 let target_abs_pos = first_len - 7;
7295 let target_rel = target_abs_pos - matcher.history_abs_start;
7296 let live = matcher.live_history();
7297 assert!(
7298 target_rel + 8 <= live.len(),
7299 "fixture must make the boundary start long-hashable"
7300 );
7301 let long_hash = matcher.long_hash_index(&live[target_rel..]);
7302 let target_slot = matcher.pack_slot(target_abs_pos);
7303 assert_ne!(
7306 target_slot, DFAST_EMPTY_SLOT,
7307 "pack_slot must never return the empty-slot sentinel for a real position"
7308 );
7309 assert_eq!(
7310 matcher.long_hash[long_hash], target_slot,
7311 "dense skip must seed long-hash entry for newly hashable boundary start"
7312 );
7313}
7314
7315#[test]
7316fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
7317 let mut matcher = DfastMatchGenerator::new(1 << 20);
7318 let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
7319 matcher.add_data(block, |_| {});
7320 matcher.ensure_hash_tables();
7321
7322 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
7323 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
7324 let seed_start = current_len - DFAST_MIN_MATCH_LEN;
7325 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
7326
7327 let target_abs_pos = current_abs_start + current_len - 4;
7328 let target_rel = target_abs_pos - matcher.history_abs_start;
7329 let live = matcher.live_history();
7330 assert!(
7331 target_rel + 4 <= live.len(),
7332 "fixture must leave the last short-hash start valid"
7333 );
7334 let short_hash = matcher.short_hash_index(&live[target_rel..]);
7335 let target_slot = matcher.pack_slot(target_abs_pos);
7336 assert_ne!(
7337 target_slot, DFAST_EMPTY_SLOT,
7338 "pack_slot must never return the empty-slot sentinel for a real position"
7339 );
7340 assert_eq!(
7341 matcher.short_hash[short_hash], target_slot,
7342 "tail seeding must include the last 4-byte-hashable start"
7343 );
7344}
7345
7346#[test]
7347fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
7348 let mut matcher = DfastMatchGenerator::new(1 << 20);
7349 let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
7350 matcher.add_data(block, |_| {});
7351 matcher.ensure_hash_tables();
7352
7353 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
7354 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
7355 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
7356
7357 let target_abs_pos = current_abs_start + current_len - 4;
7358 let target_rel = target_abs_pos - matcher.history_abs_start;
7359 let live = matcher.live_history();
7360 assert!(
7361 target_rel + 4 <= live.len(),
7362 "fixture must leave the last short-hash start valid"
7363 );
7364 let short_hash = matcher.short_hash_index(&live[target_rel..]);
7365 let target_slot = matcher.pack_slot(target_abs_pos);
7366 assert_ne!(
7367 target_slot, DFAST_EMPTY_SLOT,
7368 "pack_slot must never return the empty-slot sentinel for a real position"
7369 );
7370 assert_eq!(
7371 matcher.short_hash[short_hash], target_slot,
7372 "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
7373 );
7374}
7375
7376#[test]
7392fn dfast_ensure_room_for_rebases_above_guard_band() {
7393 let mut dfast = DfastMatchGenerator::new(1 << 22);
7394 dfast.set_hash_bits(10);
7395 dfast.ensure_hash_tables();
7396
7397 let early_abs = 1024usize;
7405 let early_packed = dfast.pack_slot(early_abs);
7406 assert_ne!(early_packed, DFAST_EMPTY_SLOT);
7407 dfast.short_hash[0] = early_packed;
7408 dfast.long_hash[0] = early_packed;
7409
7410 let trigger_abs = (u32::MAX as usize) - (DFAST_REBASE_GUARD_BAND as usize) + 1;
7416 assert_eq!(dfast.position_base, 0);
7417 dfast.ensure_room_for(trigger_abs);
7418 assert_eq!(
7419 dfast.position_base, DFAST_REBASE_GUARD_BAND as usize,
7420 "rebase must advance position_base by DFAST_REBASE_GUARD_BAND"
7421 );
7422
7423 assert_eq!(
7429 dfast.short_hash[0], DFAST_EMPTY_SLOT,
7430 "pre-rebase short-hash entries below the reducer must become empty"
7431 );
7432 assert_eq!(
7433 dfast.long_hash[0], DFAST_EMPTY_SLOT,
7434 "pre-rebase long-hash entries below the reducer must become empty"
7435 );
7436
7437 let post_packed = dfast.pack_slot(trigger_abs);
7441 assert_ne!(post_packed, DFAST_EMPTY_SLOT);
7442 let unpacked = dfast.position_base + (post_packed as usize) - 1;
7443 assert_eq!(
7444 unpacked, trigger_abs,
7445 "post-rebase pack/unpack must round-trip the absolute position"
7446 );
7447}
7448
7449#[test]
7450fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
7451 let mut matcher = DfastMatchGenerator::new(1 << 22);
7452 let boundary_prefix = [0xFA, 0xFB, 0xFC];
7453 let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
7454
7455 let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
7456 let first_tail_start = first.len() - boundary_prefix.len();
7457 first[first_tail_start..].copy_from_slice(&boundary_prefix);
7458 matcher.add_data(first, |_| {});
7459 matcher.skip_matching(Some(true));
7460
7461 let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
7462 second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
7463 matcher.add_data(second.clone(), |_| {});
7464 matcher.skip_matching(Some(true));
7465
7466 let mut third = boundary_prefix.to_vec();
7467 third.extend_from_slice(&boundary_suffix);
7468 third.extend_from_slice(b"-trailing-literals");
7469 matcher.add_data(third, |_| {});
7470
7471 let mut first_sequence = None;
7472 matcher.start_matching(|seq| {
7473 if first_sequence.is_some() {
7474 return;
7475 }
7476 first_sequence = Some(match seq {
7477 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7478 Sequence::Triple {
7479 literals,
7480 offset,
7481 match_len,
7482 } => (literals.len(), offset, match_len),
7483 });
7484 });
7485
7486 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7487 assert_eq!(
7488 lit_len, 0,
7489 "expected immediate match from the prior sparse-skip boundary"
7490 );
7491 assert_eq!(
7492 offset,
7493 second.len() + boundary_prefix.len(),
7494 "expected match against backfilled first→second boundary start"
7495 );
7496 assert!(
7497 match_len >= DFAST_MIN_MATCH_LEN,
7498 "match length should satisfy dfast minimum match length"
7499 );
7500}
7501
7502#[test]
7503fn fastest_hint_iteration_23_sequences_reconstruct_source() {
7504 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
7505 let mut state = seed;
7506 let mut data = Vec::with_capacity(len);
7507 for _ in 0..len {
7508 state = state
7509 .wrapping_mul(6364136223846793005)
7510 .wrapping_add(1442695040888963407);
7511 data.push((state >> 33) as u8);
7512 }
7513 data
7514 }
7515
7516 let i = 23u64;
7517 let len = (i * 89 % 16384) as usize;
7518 let mut data = generate_data(i, len);
7519 let repeat = data[128..256].to_vec();
7522 data.extend_from_slice(&repeat);
7523 data.extend_from_slice(&repeat);
7524
7525 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
7526 driver.set_source_size_hint(data.len() as u64);
7527 driver.reset(CompressionLevel::Fastest);
7528 let mut space = driver.get_next_space();
7529 space[..data.len()].copy_from_slice(&data);
7530 space.truncate(data.len());
7531 driver.commit_space(space);
7532
7533 let mut rebuilt = Vec::with_capacity(data.len());
7534 let mut saw_triple = false;
7535 driver.start_matching(|seq| match seq {
7536 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
7537 Sequence::Triple {
7538 literals,
7539 offset,
7540 match_len,
7541 } => {
7542 saw_triple = true;
7543 rebuilt.extend_from_slice(literals);
7544 assert!(offset > 0, "offset must be non-zero");
7545 assert!(
7546 offset <= rebuilt.len(),
7547 "offset must reference already-produced bytes: offset={} produced={}",
7548 offset,
7549 rebuilt.len()
7550 );
7551 let start = rebuilt.len() - offset;
7552 for idx in 0..match_len {
7553 let b = rebuilt[start + idx];
7554 rebuilt.push(b);
7555 }
7556 }
7557 });
7558
7559 assert!(saw_triple, "fixture must emit at least one match");
7560 assert_eq!(rebuilt, data);
7561}