1use alloc::vec::Vec;
9use super::CompressionLevel;
13use super::Matcher;
14use super::Sequence;
15use super::blocks::encode_offset_with_history;
16use super::bt::BtMatcher;
17#[cfg(test)]
18use super::cost_model::HC_MAX_LIT;
19use super::cost_model::{
20 HC_BITCOST_MULTIPLIER, HC_FORMAT_MINMATCH, HC_OPT_NODE_LEN, HC_OPT_NUM, HC_OPT_PRICE_ARENA_LEN,
21 HC_OPT_PRICE_STRIDE, HC_PREDEF_THRESHOLD, HcOptState, HcOptimalCostProfile,
22};
23#[cfg(test)]
24use super::cost_model::{HC_BLOCKSIZE_MAX, HC_MAX_LL, HC_MAX_ML, HC_MAX_OFF, HcOptPriceType};
25use super::dfast::DfastMatchGenerator;
26#[cfg(test)]
32use super::match_table::helpers::INCOMPRESSIBLE_SKIP_STEP;
33use super::match_table::helpers::MIN_MATCH_LEN;
34#[cfg(test)]
35use super::match_table::helpers::common_prefix_len;
36#[cfg(test)]
37use super::opt::ldm::HcRawSeq;
38use super::opt::ldm::{HcOptLdmState, HcRawSeqStore};
39use super::opt::types::{
40 HcCandidateQuery, HcOptimalNode, HcOptimalPlanBuffers, HcOptimalPlanState, HcOptimalSequence,
41 MatchCandidate,
42};
43use super::row::RowMatchGenerator;
44use super::simple::fast_matcher::{FAST_LEVEL_1_HASH_LOG, FAST_LEVEL_1_MLS, FastKernelMatcher};
45#[cfg(all(
46 test,
47 feature = "std",
48 target_arch = "aarch64",
49 target_endian = "little"
50))]
51use std::arch::is_aarch64_feature_detected;
52#[cfg(all(test, feature = "std", target_arch = "x86_64"))]
53use std::arch::is_x86_feature_detected;
54
55pub(crate) const DFAST_MIN_MATCH_LEN: usize = 5;
56pub(crate) const DFAST_SHORT_HASH_LOOKAHEAD: usize = 5;
60pub(crate) const ROW_MIN_MATCH_LEN: usize = 5;
61pub(crate) const DFAST_HASH_BITS: usize = 17;
84pub(crate) const DFAST_SHORT_HASH_BITS_DELTA: usize = 1;
90pub(crate) const DFAST_EMPTY_SLOT: u32 = 0;
98
99pub(crate) const DFAST_REBASE_GUARD_BAND: u32 = 1u32 << 30;
106pub(crate) const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
107pub(crate) const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
108pub(crate) const DFAST_MAX_SKIP_STEP: usize = 8;
109pub(crate) const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
110pub(crate) const ROW_HASH_BITS: usize = 20;
111pub(crate) const ROW_LOG: usize = 5;
112pub(crate) const ROW_SEARCH_DEPTH: usize = 16;
113pub(crate) const ROW_TARGET_LEN: usize = 48;
114pub(crate) const ROW_TAG_BITS: usize = 8;
115pub(crate) const ROW_EMPTY_SLOT: u32 = u32::MAX;
116pub(crate) const ROW_HASH_KEY_LEN: usize = 4;
117#[cfg(test)]
124use super::match_table::storage::{HC_PRIME3BYTES, HC_PRIME4BYTES};
125
126#[cfg(test)]
131use super::match_table::storage::HC_EMPTY;
132use super::match_table::storage::HC3_HASH_LOG;
133#[cfg(test)]
135use super::match_table::storage::{HC_CHAIN_LOG, HC_HASH_LOG};
136const HC_SEARCH_DEPTH: usize = 16;
141use super::hc::HC_MIN_MATCH_LEN;
144const HC_OPT_MIN_MATCH_LEN: usize = HC_FORMAT_MINMATCH;
145const HC_TARGET_LEN: usize = 48;
146
147use super::hc::MAX_HC_SEARCH_DEPTH;
149
150#[derive(Copy, Clone, PartialEq, Eq)]
158struct HcConfig {
159 hash_log: usize,
160 chain_log: usize,
161 search_depth: usize,
162 target_len: usize,
163 search_mls: usize,
170}
171
172#[derive(Copy, Clone, PartialEq, Eq)]
173pub(crate) struct RowConfig {
174 pub(crate) hash_bits: usize,
175 pub(crate) row_log: usize,
176 pub(crate) search_depth: usize,
177 pub(crate) target_len: usize,
178 pub(crate) mls: usize,
185}
186
187#[cfg(test)]
190const HC_CONFIG: HcConfig = HcConfig {
191 hash_log: HC_HASH_LOG,
192 chain_log: HC_CHAIN_LOG,
193 search_depth: HC_SEARCH_DEPTH,
194 target_len: HC_TARGET_LEN,
195 search_mls: 4,
196};
197
198const HC_OVERRIDE_DEFAULT: HcConfig = HcConfig {
204 hash_log: super::match_table::storage::HC_HASH_LOG,
205 chain_log: super::match_table::storage::HC_CHAIN_LOG,
206 search_depth: HC_SEARCH_DEPTH,
207 target_len: HC_TARGET_LEN,
208 search_mls: 4,
209};
210
211const BTULTRA2_HC_CONFIG: HcConfig = HcConfig {
212 hash_log: 24,
213 chain_log: 24,
214 search_depth: 512,
215 target_len: 256,
216 search_mls: 4,
217};
218
219const BTULTRA2_HC_CONFIG_L22: HcConfig = HcConfig {
220 hash_log: 25,
221 chain_log: 27,
222 search_depth: 512,
223 target_len: 999,
224 search_mls: 4,
225};
226
227const BTULTRA2_HC_CONFIG_L22_256K: HcConfig = HcConfig {
228 hash_log: 19,
229 chain_log: 19,
230 search_depth: 1 << 13,
231 target_len: 999,
232 search_mls: 4,
233};
234
235const BTULTRA2_HC_CONFIG_L22_128K: HcConfig = HcConfig {
236 hash_log: 17,
237 chain_log: 18,
238 search_depth: 1 << 11,
239 target_len: 999,
240 search_mls: 4,
241};
242
243const BTULTRA2_HC_CONFIG_L22_16K: HcConfig = HcConfig {
244 hash_log: 15,
245 chain_log: 15,
246 search_depth: 1 << 10,
247 target_len: 999,
248 search_mls: 4,
249};
250
251#[cfg(test)]
254const ROW_CONFIG: RowConfig = RowConfig {
255 hash_bits: ROW_HASH_BITS,
256 row_log: ROW_LOG,
257 search_depth: ROW_SEARCH_DEPTH,
258 target_len: ROW_TARGET_LEN,
259 mls: ROW_MIN_MATCH_LEN,
260};
261
262const ROW_L5: RowConfig = RowConfig {
279 hash_bits: 19,
280 row_log: 4,
281 search_depth: 8,
282 target_len: 2,
283 mls: ROW_MIN_MATCH_LEN,
284};
285
286const ROW_L6: RowConfig = RowConfig {
300 hash_bits: 19,
301 row_log: 4,
302 search_depth: 8,
303 target_len: 4,
304 mls: ROW_MIN_MATCH_LEN,
305};
306const ROW_L7: RowConfig = RowConfig {
307 hash_bits: 20,
308 row_log: 4,
309 search_depth: 16,
310 target_len: 8,
311 mls: ROW_MIN_MATCH_LEN,
312};
313const ROW_L8: RowConfig = RowConfig {
314 hash_bits: 20,
315 row_log: 4,
316 search_depth: 16,
317 target_len: 16,
318 mls: ROW_MIN_MATCH_LEN,
319};
320const ROW_L9: RowConfig = RowConfig {
321 hash_bits: 21,
322 row_log: 4,
323 search_depth: 16,
324 target_len: 16,
325 mls: ROW_MIN_MATCH_LEN,
326};
327const ROW_L10: RowConfig = RowConfig {
328 hash_bits: 22,
329 row_log: 5,
330 search_depth: 32,
331 target_len: 16,
332 mls: ROW_MIN_MATCH_LEN,
333};
334const ROW_L11: RowConfig = RowConfig {
335 hash_bits: 22,
336 row_log: 6,
337 search_depth: 64,
338 target_len: 16,
339 mls: ROW_MIN_MATCH_LEN,
340};
341const ROW_L12: RowConfig = RowConfig {
342 hash_bits: 23,
343 row_log: 6,
344 search_depth: 64,
345 target_len: 32,
346 mls: ROW_MIN_MATCH_LEN,
347};
348
349#[derive(Copy, Clone, PartialEq, Eq)]
357struct DfastConfig {
358 long_hash_log: u8,
359 short_hash_log: u8,
360}
361
362const DFAST_L3: DfastConfig = DfastConfig {
365 long_hash_log: 17,
366 short_hash_log: 16,
367};
368const DFAST_L4: DfastConfig = DfastConfig {
369 long_hash_log: 18,
370 short_hash_log: 18,
371};
372
373#[derive(Copy, Clone, PartialEq, Eq)]
378struct FastConfig {
379 hash_log: u32,
380 mls: u32,
381 step_size: usize,
382}
383
384const FAST_L1: FastConfig = FastConfig {
385 hash_log: 14,
386 mls: 7,
387 step_size: 2,
388};
389const FAST_L2: FastConfig = FastConfig {
390 hash_log: 16,
391 mls: 6,
392 step_size: 2,
393};
394
395#[derive(Copy, Clone, PartialEq, Eq)]
401struct LevelParams {
402 strategy_tag: super::strategy::StrategyTag,
403 search: super::strategy::SearchMethod,
409 window_log: u8,
410 lazy_depth: u8,
411 fast: Option<FastConfig>,
418 dfast: Option<DfastConfig>,
419 hc: Option<HcConfig>,
420 row: Option<RowConfig>,
421}
422
423impl LevelParams {
424 fn backend(&self) -> super::strategy::BackendTag {
429 self.search.backend()
430 }
431
432 fn parse(&self) -> super::strategy::ParseMode {
438 match self.search {
439 super::strategy::SearchMethod::BinaryTree => super::strategy::ParseMode::Optimal,
440 _ => super::strategy::ParseMode::from_lazy_depth(self.lazy_depth),
441 }
442 }
443
444 fn pre_split(&self) -> Option<u8> {
457 match self.strategy_tag {
458 super::strategy::StrategyTag::Fast => Some(0),
459 super::strategy::StrategyTag::Dfast => Some(1),
460 super::strategy::StrategyTag::Greedy => Some(2),
461 super::strategy::StrategyTag::Lazy => {
479 if self.lazy_depth >= 2 {
480 Some(4)
481 } else {
482 Some(2)
483 }
484 }
485 super::strategy::StrategyTag::Btlazy2 => Some(4),
486 super::strategy::StrategyTag::BtOpt
487 | super::strategy::StrategyTag::BtUltra
488 | super::strategy::StrategyTag::BtUltra2 => Some(4),
489 }
490 }
491}
492
493fn apply_param_overrides(params: &mut LevelParams, ov: &super::parameters::ParamOverrides) {
501 use super::strategy::SearchMethod;
502
503 if let Some(strategy) = ov.strategy {
505 let tag = strategy.tag();
506 params.strategy_tag = tag;
507 params.search = tag.search();
508 params.lazy_depth = strategy.lazy_depth();
509 }
510
511 match params.search {
514 SearchMethod::Fast => {
515 params.fast.get_or_insert(FAST_L1);
516 }
517 SearchMethod::DoubleFast => {
518 params.dfast.get_or_insert(DFAST_L3);
519 }
520 SearchMethod::RowHash => {
521 params.row.get_or_insert(ROW_L5);
522 }
523 SearchMethod::HashChain | SearchMethod::BinaryTree => {
524 params.hc.get_or_insert(HcConfig {
529 search_mls: if matches!(params.strategy_tag, super::strategy::StrategyTag::Btlazy2)
530 {
531 5
532 } else {
533 HC_OVERRIDE_DEFAULT.search_mls
534 },
535 ..HC_OVERRIDE_DEFAULT
536 });
537 }
538 }
539
540 if let Some(window_log) = ov.window_log {
542 params.window_log = window_log;
543 }
544
545 match params.search {
549 SearchMethod::Fast => {
550 if let Some(fast) = params.fast.as_mut() {
551 if let Some(hash_log) = ov.hash_log {
552 fast.hash_log = hash_log;
553 }
554 if let Some(min_match) = ov.min_match {
555 fast.mls = min_match;
556 }
557 }
558 }
559 SearchMethod::DoubleFast => {
560 if let Some(dfast) = params.dfast.as_mut() {
561 if let Some(hash_log) = ov.hash_log {
565 dfast.long_hash_log = hash_log as u8;
566 }
567 if let Some(chain_log) = ov.chain_log {
568 dfast.short_hash_log = chain_log as u8;
569 }
570 }
571 }
572 SearchMethod::RowHash => {
573 if let Some(row) = params.row.as_mut() {
574 if let Some(hash_log) = ov.hash_log {
579 row.hash_bits = hash_log as usize;
580 }
581 if let Some(search_log) = ov.search_log {
582 let row_log = (search_log as usize).clamp(4, 6);
585 row.row_log = row_log;
586 row.search_depth = 1usize << (search_log as usize).min(row_log);
587 }
588 if let Some(target_length) = ov.target_length {
589 row.target_len = target_length as usize;
590 }
591 if let Some(min_match) = ov.min_match {
592 row.mls = min_match as usize;
593 }
594 }
595 }
596 SearchMethod::HashChain | SearchMethod::BinaryTree => {
597 if let Some(hc) = params.hc.as_mut() {
598 if let Some(hash_log) = ov.hash_log {
599 hc.hash_log = hash_log as usize;
600 }
601 if let Some(chain_log) = ov.chain_log {
602 hc.chain_log = chain_log as usize;
603 }
604 if let Some(search_log) = ov.search_log {
605 hc.search_depth = 1usize << search_log;
606 }
607 if let Some(target_length) = ov.target_length {
608 hc.target_len = target_length as usize;
609 }
610 if let Some(min_match) = ov.min_match {
611 hc.search_mls = (min_match as usize).clamp(4, 6);
616 }
617 }
618 }
619 }
620}
621
622#[cfg(feature = "hash")]
626fn ldm_strategy_ordinal(tag: super::strategy::StrategyTag, lazy_depth: u8) -> u32 {
627 use super::strategy::StrategyTag;
628 match tag {
629 StrategyTag::Fast => 1,
630 StrategyTag::Dfast => 2,
631 StrategyTag::Greedy => 3,
632 StrategyTag::Lazy => {
633 if lazy_depth >= 2 {
634 5
635 } else {
636 4
637 }
638 }
639 StrategyTag::Btlazy2 => 6,
641 StrategyTag::BtOpt => 7,
642 StrategyTag::BtUltra => 8,
643 StrategyTag::BtUltra2 => 9,
644 }
645}
646
647pub(crate) fn source_size_ceil_log(size: u64) -> u8 {
657 if size == 0 {
658 MIN_WINDOW_LOG
659 } else {
660 (64 - (size - 1).leading_zeros()) as u8
661 }
662}
663
664const FAST_ATTACH_DICT_CUTOFF_LOG: u8 = 13;
671
672const DFAST_ATTACH_DICT_CUTOFF_LOG: u8 = 14;
681
682const ROW_ATTACH_DICT_CUTOFF_LOG: u8 = 15;
687
688fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
692 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
693 window_log.max(MIN_WINDOW_LOG as usize)
694}
695
696fn row_hash_bits_for_window(max_window_size: usize) -> usize {
697 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
706 (window_log + 1).max(MIN_WINDOW_LOG as usize)
707}
708
709fn hc_hash_bits_for_window(max_window_size: usize) -> usize {
714 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
715 window_log.max(MIN_WINDOW_LOG as usize)
716}
717
718#[rustfmt::skip]
727const LEVEL_TABLE: [LevelParams; 22] = [
728 LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, search: super::strategy::SearchMethod::Fast, window_log: 19, lazy_depth: 0, fast: Some(FAST_L1), dfast: None, hc: None, row: None },
733 LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, search: super::strategy::SearchMethod::Fast, window_log: 20, lazy_depth: 0, fast: Some(FAST_L2), dfast: None, hc: None, row: None },
734 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, search: super::strategy::SearchMethod::DoubleFast, window_log: 21, lazy_depth: 1, fast: None, dfast: Some(DFAST_L3), hc: None, row: None },
735 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, search: super::strategy::SearchMethod::DoubleFast, window_log: 21, lazy_depth: 1, fast: None, dfast: Some(DFAST_L4), hc: None, row: None },
736 LevelParams { strategy_tag: super::strategy::StrategyTag::Greedy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 0, fast: None, dfast: None, hc: None, row: Some(ROW_L5) },
744 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 1, fast: None, dfast: None, hc: None, row: Some(ROW_L6) },
753 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 1, fast: None, dfast: None, hc: None, row: Some(ROW_L7) },
754 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L8) },
755 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L9) },
756 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L10) },
757 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L11) },
758 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L12) },
759 LevelParams { strategy_tag: super::strategy::StrategyTag::Btlazy2, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 22, search_depth: 16, target_len: 32, search_mls: 5 }), row: None },
768 LevelParams { strategy_tag: super::strategy::StrategyTag::Btlazy2, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 32, search_mls: 5 }), row: None },
769 LevelParams { strategy_tag: super::strategy::StrategyTag::Btlazy2, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 23, chain_log: 23, search_depth: 64, target_len: 32, search_mls: 5 }), row: None },
770 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 48, search_mls: 5 }), row: None },
771 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, search: super::strategy::SearchMethod::BinaryTree, window_log: 23, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 23, search_depth: 32, target_len: 64, search_mls: 4 }), row: None },
772 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, search: super::strategy::SearchMethod::BinaryTree, window_log: 23, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 23, search_depth: 64, target_len: 64, search_mls: 4 }), row: None },
773 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 23, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 24, search_depth: 128, target_len: 256, search_mls: 4 }), row: None },
774 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 25, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 23, chain_log: 25, search_depth: 128, target_len: 256, search_mls: 4 }), row: None },
775 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 26, lazy_depth: 2, fast: None, dfast: None, hc: Some(BTULTRA2_HC_CONFIG), row: None },
776 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 27, lazy_depth: 2, fast: None, dfast: None, hc: Some(BTULTRA2_HC_CONFIG_L22), row: None },
777];
778
779const DICT_MIN_SRC_SIZE: u64 = 513;
784
785fn dict_and_window_log(window_log: u8, src_size: u64, dict_size: u64) -> u32 {
791 if dict_size == 0 {
792 return window_log as u32;
793 }
794 let window_size: u64 = 1u64 << window_log;
795 let dict_and_window = dict_size + window_size;
799 if window_size >= dict_size + src_size {
800 window_log as u32
802 } else {
803 source_size_ceil_log(dict_and_window) as u32
805 }
806}
807
808fn cdict_table_logs(
817 window_log: u8,
818 hash_log: usize,
819 chain_log: usize,
820 uses_bt: bool,
821 dict_size: usize,
822) -> (usize, usize) {
823 let dict_size = dict_size as u64;
824 let src_size = DICT_MIN_SRC_SIZE;
826 let tsize = src_size + dict_size;
830 let resized_window_log = (window_log as u32)
831 .min(source_size_ceil_log(tsize) as u32)
832 .max(1);
833 let daw = dict_and_window_log(resized_window_log as u8, src_size, dict_size);
834 let cycle_log = (chain_log as u32).saturating_sub(uses_bt as u32);
836 let new_hash_log = if hash_log as u32 > daw + 1 {
837 (daw + 1) as usize
838 } else {
839 hash_log
840 };
841 let new_chain_log = if cycle_log > daw {
842 chain_log.saturating_sub((cycle_log - daw) as usize)
843 } else {
844 chain_log
845 };
846 (new_hash_log, new_chain_log)
847}
848
849pub(crate) const MIN_WINDOW_LOG: u8 = 10;
851const MIN_HINTED_WINDOW_LOG: u8 = 14;
857
858fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
868 let raw_src_log = source_size_ceil_log(src_size);
878 let src_log = raw_src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
879 if src_log < params.window_log {
880 params.window_log = src_log;
881 }
882 let table_log = raw_src_log.max(MIN_WINDOW_LOG);
893 let backend = params.backend();
894 if backend == super::strategy::BackendTag::HashChain {
895 let hc = params
896 .hc
897 .as_mut()
898 .expect("HashChain level row carries an HcConfig");
899 if (table_log + 2) < hc.hash_log as u8 {
900 hc.hash_log = (table_log + 2) as usize;
901 }
902 if (table_log + 1) < hc.chain_log as u8 {
903 hc.chain_log = (table_log + 1) as usize;
904 }
905 } else if backend == super::strategy::BackendTag::Row {
906 let row = params
907 .row
908 .as_mut()
909 .expect("Row level row carries a RowConfig");
910 let row_cap = (table_log + 1) as usize;
918 if row_cap < row.hash_bits {
919 row.hash_bits = row_cap;
920 }
921 } else if backend == super::strategy::BackendTag::Simple {
922 let fast = params
923 .fast
924 .as_mut()
925 .expect("Fast level row carries a FastConfig");
926 let fast_cap = (table_log + 1) as u32;
927 if fast_cap < fast.hash_log {
928 fast.hash_log = fast_cap;
929 }
930 }
931 params
932}
933
934fn level22_btultra2_params_for_source_size(source_size: Option<u64>) -> LevelParams {
935 let mut hc = match source_size {
936 Some(size) if size <= 16 * 1024 => BTULTRA2_HC_CONFIG_L22_16K,
937 Some(size) if size <= 128 * 1024 => BTULTRA2_HC_CONFIG_L22_128K,
938 Some(size) if size <= 256 * 1024 => BTULTRA2_HC_CONFIG_L22_256K,
939 _ => BTULTRA2_HC_CONFIG_L22,
940 };
941 let mut window_log = match source_size {
942 Some(size) if size <= 16 * 1024 => 14,
943 Some(size) if size <= 128 * 1024 => 17,
944 Some(size) if size <= 256 * 1024 => 18,
945 _ => 27,
946 };
947 if let Some(size) = source_size
948 && size > 256 * 1024
949 {
950 let src_log = source_size_ceil_log(size);
951 window_log = window_log.min(src_log.max(MIN_WINDOW_LOG));
952 let adjusted_table_log = window_log as usize + 1;
953 hc.hash_log = hc.hash_log.min(adjusted_table_log);
954 hc.chain_log = hc.chain_log.min(adjusted_table_log);
955 }
956 LevelParams {
957 strategy_tag: super::strategy::StrategyTag::BtUltra2,
958 search: super::strategy::SearchMethod::BinaryTree,
959 window_log,
960 lazy_depth: 2,
961 fast: None,
962 dfast: None,
963 hc: Some(hc),
964 row: None,
965 }
966}
967
968pub fn estimated_compression_workspace_bytes(level: CompressionLevel) -> usize {
974 use super::strategy::StrategyTag;
975 let params = resolve_level_params(level, None);
976 let window = 1usize << params.window_log;
977 let wants_hash3 = matches!(
982 params.strategy_tag,
983 StrategyTag::BtUltra | StrategyTag::BtUltra2
984 );
985 let uses_bt = matches!(
986 params.strategy_tag,
987 StrategyTag::Btlazy2 | StrategyTag::BtOpt | StrategyTag::BtUltra | StrategyTag::BtUltra2
988 );
989 let tables = params.fast.map(|f| 4usize << f.hash_log).unwrap_or(0)
990 + params
991 .dfast
992 .map(|d| (4usize << d.long_hash_log) + (4usize << d.short_hash_log))
993 .unwrap_or(0)
994 + params
995 .hc
996 .map(|h| {
997 let hash3 = if wants_hash3 {
998 4usize
999 << super::match_table::storage::HC3_HASH_LOG.min(params.window_log as usize)
1000 } else {
1001 0
1002 };
1003 (4usize << h.hash_log) + (4usize << h.chain_log) + hash3
1004 })
1005 .unwrap_or(0)
1006 + params
1007 .row
1008 .map(|r| (4usize << r.hash_bits) + (2usize << r.hash_bits))
1009 .unwrap_or(0);
1010 let bt = if uses_bt {
1013 super::bt::BtMatcher::estimated_workspace_bytes()
1014 } else {
1015 0
1016 };
1017 let staging = 3 * (128 * 1024);
1020 window + tables + bt + staging
1021}
1022
1023pub fn estimated_bt_strategy_extra_bytes(strategy_ordinal: u32, window_log: u32) -> usize {
1028 if !(6..=9).contains(&strategy_ordinal) {
1029 return 0;
1030 }
1031 let hash3 = if matches!(strategy_ordinal, 8 | 9) {
1032 4usize << super::match_table::storage::HC3_HASH_LOG.min(window_log as usize)
1033 } else {
1034 0
1035 };
1036 super::bt::BtMatcher::estimated_workspace_bytes() + hash3
1037}
1038
1039fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
1042 if matches!(level, CompressionLevel::Level(22)) {
1043 return level22_btultra2_params_for_source_size(source_size);
1044 }
1045 let params = match level {
1046 CompressionLevel::Uncompressed => LevelParams {
1047 strategy_tag: super::strategy::StrategyTag::Fast,
1048 search: super::strategy::SearchMethod::Fast,
1049 window_log: 17,
1053 lazy_depth: 0,
1054 fast: Some(FastConfig {
1058 hash_log: 14,
1059 mls: 6,
1060 step_size: 2,
1061 }),
1062 dfast: None,
1063 hc: None,
1064 row: None,
1065 },
1066 CompressionLevel::Fastest => {
1067 let mut p = LEVEL_TABLE[0];
1074 p.fast = Some(FastConfig {
1075 hash_log: 14,
1076 mls: 6,
1077 step_size: 2,
1078 });
1079 p
1080 }
1081 CompressionLevel::Default => LEVEL_TABLE[2],
1082 CompressionLevel::Better => LEVEL_TABLE[6],
1083 CompressionLevel::Best => LEVEL_TABLE[12],
1089 CompressionLevel::Level(n) => {
1090 if n > 0 {
1091 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
1092 LEVEL_TABLE[idx]
1093 } else if n == 0 {
1094 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
1096 } else {
1097 let clamped = n.max(CompressionLevel::MIN_LEVEL);
1107 let target_length = (-clamped) as usize;
1108 let step_size = target_length + 1;
1109 LevelParams {
1118 strategy_tag: super::strategy::StrategyTag::Fast,
1119 search: super::strategy::SearchMethod::Fast,
1120 window_log: 19,
1121 lazy_depth: 0,
1122 fast: Some(FastConfig {
1123 hash_log: 13,
1124 mls: 7,
1125 step_size,
1126 }),
1127 dfast: None,
1128 hc: None,
1129 row: None,
1130 }
1131 }
1132 }
1133 };
1134 if let Some(size) = source_size {
1135 adjust_params_for_source_size(params, size)
1136 } else {
1137 params
1138 }
1139}
1140
1141pub(crate) fn level_pre_split(level: CompressionLevel) -> Option<usize> {
1147 if matches!(level, CompressionLevel::Uncompressed) {
1153 return None;
1154 }
1155 resolve_level_params(level, None)
1156 .pre_split()
1157 .map(usize::from)
1158}
1159
1160#[derive(Clone)]
1178enum MatcherStorage {
1179 Simple(FastKernelMatcher),
1186 Dfast(DfastMatchGenerator),
1191 Row(RowMatchGenerator),
1195 HashChain(HcMatchGenerator),
1207}
1208
1209impl MatcherStorage {
1210 fn heap_size(&self) -> usize {
1212 match self {
1213 Self::Simple(m) => m.heap_size(),
1214 Self::Dfast(m) => m.heap_size(),
1215 Self::Row(m) => m.heap_size(),
1216 Self::HashChain(m) => m.heap_size(),
1217 }
1218 }
1219
1220 fn backend(&self) -> super::strategy::BackendTag {
1222 use super::strategy::BackendTag;
1223 match self {
1224 Self::Simple(_) => BackendTag::Simple,
1225 Self::Dfast(_) => BackendTag::Dfast,
1226 Self::Row(_) => BackendTag::Row,
1227 Self::HashChain(_) => BackendTag::HashChain,
1228 }
1229 }
1230}
1231
1232pub struct MatchGeneratorDriver {
1234 vec_pool: Vec<Vec<u8>>,
1235 storage: MatcherStorage,
1242 strategy_tag: super::strategy::StrategyTag,
1248 search: super::strategy::SearchMethod,
1254 parse: super::strategy::ParseMode,
1260 #[cfg(test)]
1264 config_override: Option<(super::strategy::SearchMethod, super::strategy::ParseMode)>,
1265 param_overrides: Option<super::parameters::ParamOverrides>,
1274 slice_size: usize,
1275 base_slice_size: usize,
1276 reported_window_size: usize,
1279 dictionary_retained_budget: usize,
1282 source_size_hint: Option<u64>,
1284 dictionary_size_hint: Option<usize>,
1292 reset_size_log: Option<u8>,
1301 reset_shape: Option<(
1308 LevelParams,
1309 usize,
1310 bool,
1311 Option<super::parameters::LdmOverride>,
1312 )>,
1313 borrowed_pending: Option<(usize, usize)>,
1320 primed: Option<(MatcherStorage, usize, PrimedKey)>,
1332}
1333
1334#[derive(Clone, Copy, PartialEq, Eq)]
1375struct PrimedKey {
1376 level: super::CompressionLevel,
1377 params: LevelParams,
1378 table_bits: usize,
1379 fast_attach: bool,
1380 ldm: Option<super::parameters::LdmOverride>,
1389}
1390
1391impl MatchGeneratorDriver {
1392 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
1397 assert!(
1414 slice_size > 0,
1415 "MatchGeneratorDriver::new requires slice_size > 0 (got 0)",
1416 );
1417 assert!(
1418 max_slices_in_window > 0,
1419 "MatchGeneratorDriver::new requires max_slices_in_window > 0 (got 0)",
1420 );
1421 let max_window_size = max_slices_in_window
1422 .checked_mul(slice_size)
1423 .expect("MatchGeneratorDriver::new: slice_size * max_slices_in_window overflows usize");
1424 let next_pow2 = max_window_size.checked_next_power_of_two().expect(
1439 "MatchGeneratorDriver::new: max_window_size too large for \
1440 next_power_of_two without overflow",
1441 );
1442 let window_log_init = next_pow2.trailing_zeros() as u8;
1443 Self {
1444 vec_pool: Vec::new(),
1445 storage: MatcherStorage::Simple(FastKernelMatcher::with_params(
1446 window_log_init,
1447 FAST_LEVEL_1_HASH_LOG,
1448 FAST_LEVEL_1_MLS,
1449 2, )),
1451 strategy_tag: super::strategy::StrategyTag::Fast,
1452 search: super::strategy::SearchMethod::Fast,
1453 parse: super::strategy::ParseMode::Greedy,
1454 #[cfg(test)]
1455 config_override: None,
1456 param_overrides: None,
1457 slice_size,
1458 base_slice_size: slice_size,
1459 reported_window_size: next_pow2,
1468 reset_size_log: None,
1469 reset_shape: None,
1470 dictionary_retained_budget: 0,
1471 source_size_hint: None,
1472 dictionary_size_hint: None,
1473 borrowed_pending: None,
1474 primed: None,
1475 }
1476 }
1477
1478 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
1479 resolve_level_params(level, source_size)
1480 }
1481
1482 pub(crate) fn set_param_overrides(
1486 &mut self,
1487 overrides: Option<super::parameters::ParamOverrides>,
1488 ) {
1489 self.param_overrides = overrides;
1490 }
1491
1492 pub(crate) fn active_backend(&self) -> super::strategy::BackendTag {
1495 self.storage.backend()
1496 }
1497
1498 pub(crate) fn borrowed_supported(&self) -> bool {
1505 use super::strategy::{BackendTag, SearchMethod, StrategyTag};
1506 match self.active_backend() {
1507 BackendTag::Simple | BackendTag::Dfast | BackendTag::Row => true,
1508 BackendTag::HashChain => match self.search {
1520 SearchMethod::HashChain => true,
1521 SearchMethod::BinaryTree => matches!(self.strategy_tag, StrategyTag::Btlazy2),
1522 _ => false,
1523 },
1524 }
1525 }
1526
1527 fn simple_mut(&mut self) -> &mut FastKernelMatcher {
1528 match &mut self.storage {
1529 MatcherStorage::Simple(m) => m,
1530 _ => panic!("simple backend must be initialized by reset() before use"),
1531 }
1532 }
1533
1534 fn recycle_simple_space(&mut self) {
1548 if let Some(space) = self.simple_mut().take_recycled_space() {
1549 self.vec_pool.push(space);
1561 }
1562 }
1563
1564 pub(crate) unsafe fn set_borrowed_window(&mut self, buffer: &[u8]) {
1574 match self.active_backend() {
1576 super::strategy::BackendTag::Simple => unsafe {
1577 self.simple_mut().set_borrowed_window(buffer)
1578 },
1579 super::strategy::BackendTag::Dfast => unsafe {
1580 self.dfast_matcher_mut().set_borrowed_window(buffer)
1581 },
1582 super::strategy::BackendTag::Row => unsafe {
1583 self.row_matcher_mut().set_borrowed_window(buffer)
1584 },
1585 super::strategy::BackendTag::HashChain => unsafe {
1586 self.hc_matcher_mut().set_borrowed_window(buffer)
1587 },
1588 }
1589 }
1590
1591 pub(crate) fn clear_borrowed_window(&mut self) {
1594 match self.active_backend() {
1595 super::strategy::BackendTag::Simple => self.simple_mut().clear_borrowed_window(),
1596 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().clear_borrowed_window(),
1597 super::strategy::BackendTag::Row => self.row_matcher_mut().clear_borrowed_window(),
1598 super::strategy::BackendTag::HashChain => self.hc_matcher_mut().clear_borrowed_window(),
1599 #[allow(unreachable_patterns)]
1600 _ => {}
1601 }
1602 self.borrowed_pending = None;
1603 }
1604
1605 pub(crate) fn set_borrowed_block(&mut self, block_start: usize, block_end: usize) {
1613 assert!(
1614 self.borrowed_supported(),
1615 "borrowed block staging is not supported for the active backend/search config",
1616 );
1617 assert!(
1618 block_start <= block_end,
1619 "borrowed block range must satisfy start <= end (start={block_start} end={block_end})",
1620 );
1621 self.borrowed_pending = Some((block_start, block_end));
1622 match self.active_backend() {
1628 super::strategy::BackendTag::Simple => self
1629 .simple_mut()
1630 .stage_borrowed_block(block_start, block_end),
1631 super::strategy::BackendTag::Dfast => self
1632 .dfast_matcher_mut()
1633 .stage_borrowed_block(block_start, block_end),
1634 super::strategy::BackendTag::Row => self
1635 .row_matcher_mut()
1636 .stage_borrowed_block(block_start, block_end),
1637 super::strategy::BackendTag::HashChain => self
1638 .hc_matcher_mut()
1639 .table
1640 .stage_borrowed_block(block_start, block_end),
1641 }
1642 }
1643
1644 #[cfg(test)]
1645 fn dfast_matcher(&self) -> &DfastMatchGenerator {
1646 match &self.storage {
1647 MatcherStorage::Dfast(m) => m,
1648 _ => panic!("dfast backend must be initialized by reset() before use"),
1649 }
1650 }
1651
1652 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
1653 match &mut self.storage {
1654 MatcherStorage::Dfast(m) => m,
1655 _ => panic!("dfast backend must be initialized by reset() before use"),
1656 }
1657 }
1658
1659 #[cfg(test)]
1660 fn row_matcher(&self) -> &RowMatchGenerator {
1661 match &self.storage {
1662 MatcherStorage::Row(m) => m,
1663 _ => panic!("row backend must be initialized by reset() before use"),
1664 }
1665 }
1666
1667 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
1668 match &mut self.storage {
1669 MatcherStorage::Row(m) => m,
1670 _ => panic!("row backend must be initialized by reset() before use"),
1671 }
1672 }
1673
1674 #[cfg(test)]
1675 fn hc_matcher(&self) -> &HcMatchGenerator {
1676 match &self.storage {
1677 MatcherStorage::HashChain(m) => m,
1678 _ => panic!("hash chain backend must be initialized by reset() before use"),
1679 }
1680 }
1681
1682 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
1683 match &mut self.storage {
1684 MatcherStorage::HashChain(m) => m,
1685 _ => panic!("hash chain backend must be initialized by reset() before use"),
1686 }
1687 }
1688
1689 #[must_use]
1698 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) -> bool {
1699 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
1700 if reclaimed == 0 {
1701 return false;
1702 }
1703 self.dictionary_retained_budget -= reclaimed;
1704 match self.active_backend() {
1705 super::strategy::BackendTag::Simple => {
1706 let matcher = self.simple_mut();
1707 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
1712 }
1713 super::strategy::BackendTag::Dfast => {
1714 let matcher = self.dfast_matcher_mut();
1715 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
1720 }
1721 super::strategy::BackendTag::Row => {
1722 let matcher = self.row_matcher_mut();
1723 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
1728 }
1729 super::strategy::BackendTag::HashChain => {
1730 let matcher = self.hc_matcher_mut();
1731 matcher.table.max_window_size =
1734 matcher.table.max_window_size.saturating_sub(reclaimed);
1735 }
1736 }
1737 true
1738 }
1739
1740 fn trim_after_budget_retire(&mut self) {
1741 loop {
1742 let mut evicted_bytes = 0usize;
1743 match self.active_backend() {
1744 super::strategy::BackendTag::Simple => {
1745 let MatcherStorage::Simple(m) = &mut self.storage else {
1754 unreachable!("active_backend() == Simple proven above");
1755 };
1756 evicted_bytes += m.trim_to_window();
1757 }
1758 super::strategy::BackendTag::Dfast => {
1759 let dfast = self.dfast_matcher_mut();
1768 let pre = dfast.window_size;
1769 dfast.trim_to_window();
1770 evicted_bytes += pre - dfast.window_size;
1771 }
1772 super::strategy::BackendTag::Row => {
1773 let row = self.row_matcher_mut();
1778 let pre = row.window_size;
1779 row.trim_to_window();
1780 evicted_bytes += pre - row.window_size;
1781 }
1782 super::strategy::BackendTag::HashChain => {
1783 let table = &mut self.hc_matcher_mut().table;
1788 let pre = table.window_size;
1789 table.trim_to_window();
1790 evicted_bytes += pre - table.window_size;
1791 }
1792 }
1793 if evicted_bytes == 0 {
1794 break;
1795 }
1796 let _ = self.retire_dictionary_budget(evicted_bytes);
1810 }
1811 }
1812
1813 fn skip_matching_for_dictionary_priming(&mut self) {
1814 match self.active_backend() {
1815 super::strategy::BackendTag::Simple => {
1816 let attach = self
1828 .reset_size_log
1829 .is_none_or(|log| log <= FAST_ATTACH_DICT_CUTOFF_LOG);
1830 if attach {
1831 self.simple_mut().skip_matching_for_dict_prime();
1832 } else {
1833 self.simple_mut().skip_matching_with_hint(Some(false));
1834 }
1835 self.recycle_simple_space();
1836 }
1837 super::strategy::BackendTag::Dfast => {
1838 let attach = self
1849 .reset_size_log
1850 .is_none_or(|log| log <= DFAST_ATTACH_DICT_CUTOFF_LOG);
1851 if attach {
1852 self.dfast_matcher_mut().skip_matching_for_dict_attach();
1853 } else {
1854 self.dfast_matcher_mut().invalidate_dict_cache();
1855 self.dfast_matcher_mut().skip_matching_dense();
1856 }
1857 }
1858 super::strategy::BackendTag::Row => {
1859 let attach = self
1866 .reset_size_log
1867 .is_none_or(|log| log <= ROW_ATTACH_DICT_CUTOFF_LOG);
1868 if attach {
1869 self.row_matcher_mut().prime_dict_attach_current_block();
1870 } else {
1871 self.row_matcher_mut().invalidate_dict_cache();
1872 self.row_matcher_mut().skip_matching_with_hint(Some(false));
1873 }
1874 }
1875 super::strategy::BackendTag::HashChain => {
1876 let table = &mut self.hc_matcher_mut().table;
1877 if table.uses_bt {
1878 table.skip_matching_dict_bt();
1883 } else {
1884 self.hc_matcher_mut().skip_matching(Some(false));
1885 }
1886 }
1887 }
1888 }
1889}
1890
1891impl Matcher for MatchGeneratorDriver {
1892 fn supports_dictionary_priming(&self) -> bool {
1893 true
1894 }
1895
1896 fn set_source_size_hint(&mut self, size: u64) {
1897 self.source_size_hint = Some(size);
1898 }
1899
1900 fn set_dictionary_size_hint(&mut self, size: usize) {
1901 self.dictionary_size_hint = Some(size);
1902 }
1903
1904 fn heap_size(&self) -> usize {
1909 let pool: usize = self.vec_pool.capacity() * core::mem::size_of::<Vec<u8>>()
1910 + self.vec_pool.iter().map(Vec::capacity).sum::<usize>();
1911 let snapshot = self
1912 .primed
1913 .as_ref()
1914 .map_or(0, |(storage, _, _)| storage.heap_size());
1915 pool + self.storage.heap_size() + snapshot
1916 }
1917
1918 fn clear_param_overrides(&mut self) {
1919 self.param_overrides = None;
1920 }
1921
1922 fn reset(&mut self, level: CompressionLevel) {
1923 let hint = self.source_size_hint.take();
1924 let dict_hint = self.dictionary_size_hint.take();
1925 self.reset_size_log = hint.map(source_size_ceil_log);
1931 let hinted = hint.is_some();
1932 #[cfg_attr(not(test), allow(unused_mut))]
1933 let mut params = Self::level_params(level, hint);
1934 #[cfg(test)]
1942 if let Some((search, parse)) = self.config_override.take() {
1943 params.search = search;
1944 params.lazy_depth = parse.lazy_depth();
1945 use super::strategy::SearchMethod;
1950 match search {
1951 SearchMethod::Fast => {
1952 params.fast.get_or_insert(FAST_L1);
1953 }
1954 SearchMethod::DoubleFast => {
1955 params.dfast.get_or_insert(DFAST_L3);
1956 }
1957 SearchMethod::RowHash => {
1958 params.row.get_or_insert(ROW_CONFIG);
1959 }
1960 SearchMethod::HashChain | SearchMethod::BinaryTree => {
1961 params.hc.get_or_insert(HC_CONFIG);
1962 }
1963 }
1964 }
1965 if let Some(ov) = self.param_overrides
1971 && !ov.is_empty()
1972 {
1973 apply_param_overrides(&mut params, &ov);
1974 if let Some(hint_size) = hint {
1984 params = adjust_params_for_source_size(params, hint_size);
1985 if let Some(window_log) = ov.window_log {
1986 params.window_log = window_log;
1987 }
1988 }
1989 }
1990 if let Some(dict_size) = dict_hint.filter(|&size| size > 0) {
2009 let mut base_params = Self::level_params(level, None);
2023 if let Some(ov) = self.param_overrides
2024 && !ov.is_empty()
2025 {
2026 apply_param_overrides(&mut base_params, &ov);
2027 }
2028 if let (Some(hc), Some(base_hc)) = (params.hc.as_mut(), base_params.hc) {
2029 let uses_bt = matches!(
2030 params.strategy_tag,
2031 super::strategy::StrategyTag::Btlazy2
2032 | super::strategy::StrategyTag::BtOpt
2033 | super::strategy::StrategyTag::BtUltra
2034 | super::strategy::StrategyTag::BtUltra2
2035 );
2036 let (dict_hash_log, dict_chain_log) = cdict_table_logs(
2037 params.window_log,
2038 base_hc.hash_log,
2039 base_hc.chain_log,
2040 uses_bt,
2041 dict_size,
2042 );
2043 hc.hash_log = dict_hash_log;
2044 hc.chain_log = dict_chain_log;
2045 }
2046 }
2047 let next_backend = params.backend();
2048 let max_window_size = 1usize << params.window_log;
2049 self.dictionary_retained_budget = 0;
2050 self.borrowed_pending = None;
2053 if self.active_backend() != next_backend {
2054 match &mut self.storage {
2060 MatcherStorage::Simple(_m) => {
2061 }
2068 MatcherStorage::Dfast(m) => {
2069 m.short_hash = Vec::new();
2082 m.long_hash = Vec::new();
2083 m.reset();
2084 }
2085 MatcherStorage::Row(m) => {
2086 m.row_heads = Vec::new();
2087 m.row_positions = Vec::new();
2088 m.row_tags = Vec::new();
2089 m.reset();
2090 }
2091 MatcherStorage::HashChain(m) => {
2092 m.table.hash_table = Vec::new();
2100 m.table.chain_table = Vec::new();
2101 m.table.hash3_table = Vec::new();
2102 let vec_pool = &mut self.vec_pool;
2103 m.reset(|mut data| {
2104 data.resize(data.capacity(), 0);
2105 vec_pool.push(data);
2106 });
2107 }
2108 }
2109 self.storage = match next_backend {
2112 super::strategy::BackendTag::Simple => {
2113 let fast = params.fast.expect("Fast level row carries a FastConfig");
2119 MatcherStorage::Simple(FastKernelMatcher::with_params(
2120 params.window_log,
2121 fast.hash_log,
2122 fast.mls,
2123 fast.step_size,
2124 ))
2125 }
2126 super::strategy::BackendTag::Dfast => {
2127 MatcherStorage::Dfast(DfastMatchGenerator::new(max_window_size))
2128 }
2129 super::strategy::BackendTag::Row => {
2130 MatcherStorage::Row(RowMatchGenerator::new(max_window_size))
2131 }
2132 super::strategy::BackendTag::HashChain => {
2133 MatcherStorage::HashChain(HcMatchGenerator::new(max_window_size))
2134 }
2135 };
2136 }
2137
2138 self.strategy_tag = params.strategy_tag;
2144 self.search = params.search;
2145 self.parse = params.parse();
2146 self.slice_size = self.base_slice_size.min(max_window_size);
2147 self.reported_window_size = max_window_size;
2148 let strategy_tag = self.strategy_tag;
2149 let table_window_size = match hint {
2155 Some(h) => {
2156 let raw_log = source_size_ceil_log(h);
2157 let shift = raw_log.max(MIN_WINDOW_LOG).min(usize::BITS as u8 - 1);
2164 (1usize << shift).min(max_window_size)
2165 }
2166 None => max_window_size,
2167 };
2168 let mut resolved_table_bits: usize = 0;
2173 match &mut self.storage {
2174 MatcherStorage::Simple(m) => {
2175 let fast = params.fast.expect("Fast level row carries a FastConfig");
2179 let dict_attach_epoch = matches!(dict_hint, Some(size) if size > 0)
2189 && self
2190 .reset_size_log
2191 .is_none_or(|log| log <= FAST_ATTACH_DICT_CUTOFF_LOG);
2192 let table_overwritten_by_restore = matches!(dict_hint, Some(size) if size > 0)
2203 && !dict_attach_epoch
2204 && self.primed.as_ref().is_some_and(|(_, _, captured)| {
2205 *captured
2206 == PrimedKey {
2207 level,
2208 params,
2209 table_bits: 0,
2210 fast_attach: false,
2211 ldm: None,
2212 }
2213 });
2214 m.reset(
2215 params.window_log,
2216 fast.hash_log,
2217 fast.mls,
2218 fast.step_size,
2219 dict_attach_epoch,
2220 table_overwritten_by_restore,
2221 );
2222 }
2223 MatcherStorage::Dfast(dfast) => {
2224 dfast.max_window_size = max_window_size;
2225 let dcfg = params
2226 .dfast
2227 .expect("Dfast level row must carry a DfastConfig");
2228 let long_bits = if hinted {
2232 dfast_hash_bits_for_window(table_window_size).min(dcfg.long_hash_log as usize)
2233 } else {
2234 dcfg.long_hash_log as usize
2235 };
2236 let short_bits = if hinted {
2237 dfast_hash_bits_for_window(table_window_size).min(dcfg.short_hash_log as usize)
2238 } else {
2239 dcfg.short_hash_log as usize
2240 };
2241 resolved_table_bits = long_bits;
2242 dfast.set_hash_bits(long_bits, short_bits);
2243 dfast.reset();
2247 }
2248 MatcherStorage::Row(row) => {
2249 row.max_window_size = max_window_size;
2250 row.lazy_depth = params.lazy_depth;
2251 let mut row_cfg = params.row.expect("Row level row carries a RowConfig");
2252 if hinted {
2253 row_cfg.hash_bits = row_cfg
2266 .hash_bits
2267 .min(row_hash_bits_for_window(table_window_size));
2268 }
2269 row.configure(row_cfg);
2270 resolved_table_bits = row.hash_bits();
2276 row.reset();
2277 }
2278 MatcherStorage::HashChain(hc) => {
2279 hc.table.max_window_size = max_window_size;
2280 hc.hc.lazy_depth = params.lazy_depth;
2281 let mut hc_cfg = params.hc.expect("HashChain level row carries an HcConfig");
2282 if hinted {
2294 let wlog = hc_hash_bits_for_window(table_window_size);
2295 let uses_bt = matches!(
2296 strategy_tag,
2297 super::strategy::StrategyTag::Btlazy2
2298 | super::strategy::StrategyTag::BtOpt
2299 | super::strategy::StrategyTag::BtUltra
2300 | super::strategy::StrategyTag::BtUltra2
2301 );
2302 hc_cfg.hash_log = hc_cfg.hash_log.min(wlog + 1);
2303 hc_cfg.chain_log = hc_cfg.chain_log.min(if uses_bt { wlog + 1 } else { wlog });
2304 }
2305 hc.configure(hc_cfg, strategy_tag, params.window_log);
2306 let vec_pool = &mut self.vec_pool;
2307 hc.reset(|mut data| {
2308 data.resize(data.capacity(), 0);
2309 vec_pool.push(data);
2310 });
2311 if let Some(src) = hint {
2318 let src_hint = usize::try_from(src).unwrap_or(usize::MAX);
2325 let expected = src_hint.saturating_add(dict_hint.unwrap_or(0));
2326 hc.table.reserve_history(expected);
2327 }
2328 }
2329 }
2330 #[cfg(feature = "hash")]
2338 if let MatcherStorage::HashChain(hc) = &mut self.storage {
2339 let producer = self
2340 .param_overrides
2341 .as_ref()
2342 .and_then(|ov| ov.ldm)
2343 .map(|ldm_ov| {
2344 let strategy_ord = ldm_strategy_ordinal(params.strategy_tag, params.lazy_depth);
2345 let seed = super::ldm::params::LdmParams {
2352 window_log: params.window_log as u32,
2353 hash_log: ldm_ov.hash_log.unwrap_or(0),
2354 hash_rate_log: ldm_ov.hash_rate_log.unwrap_or(0),
2355 min_match_length: ldm_ov.min_match.unwrap_or(0),
2356 bucket_size_log: ldm_ov.bucket_size_log.unwrap_or(0),
2357 };
2358 super::ldm::LdmProducer::new(seed.derive(strategy_ord))
2359 });
2360 hc.set_ldm_producer(producer);
2361 }
2362 let fast_attach = matches!(next_backend, super::strategy::BackendTag::Simple)
2372 && self
2373 .reset_size_log
2374 .is_none_or(|log| log <= FAST_ATTACH_DICT_CUTOFF_LOG);
2375 let active_ldm = if matches!(params.search, super::strategy::SearchMethod::BinaryTree) {
2384 self.param_overrides.and_then(|ov| ov.ldm)
2385 } else {
2386 None
2387 };
2388 self.reset_shape = Some((params, resolved_table_bits, fast_attach, active_ldm));
2389 }
2390
2391 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
2392 match self.active_backend() {
2393 super::strategy::BackendTag::Simple => {
2394 self.simple_mut().prime_offset_history(offset_hist);
2403 }
2404 super::strategy::BackendTag::Dfast => {
2405 self.dfast_matcher_mut().offset_hist = offset_hist
2406 }
2407 super::strategy::BackendTag::Row => self.row_matcher_mut().offset_hist = offset_hist,
2408 super::strategy::BackendTag::HashChain => {
2409 let matcher = self.hc_matcher_mut();
2410 matcher.table.offset_hist = offset_hist;
2411 matcher.table.mark_dictionary_primed();
2412 }
2413 }
2414
2415 if dict_content.is_empty() {
2416 return;
2417 }
2418
2419 const MAX_PRIMED_WINDOW_SIZE: usize =
2434 (u32::MAX as usize - crate::common::MAX_BLOCK_SIZE as usize) / 2;
2435
2436 let requested_dict_budget = dict_content.len();
2450 let base_max_window_size = match self.active_backend() {
2451 super::strategy::BackendTag::Simple => self.simple_mut().max_window_size,
2452 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().max_window_size,
2453 super::strategy::BackendTag::Row => self.row_matcher_mut().max_window_size,
2454 super::strategy::BackendTag::HashChain => self.hc_matcher_mut().table.max_window_size,
2455 };
2456 match self.active_backend() {
2457 super::strategy::BackendTag::Simple => {
2458 let matcher = self.simple_mut();
2459 matcher.max_window_size = matcher
2460 .max_window_size
2461 .saturating_add(requested_dict_budget)
2462 .min(MAX_PRIMED_WINDOW_SIZE);
2463 }
2464 super::strategy::BackendTag::Dfast => {
2465 let matcher = self.dfast_matcher_mut();
2466 matcher.max_window_size = matcher
2467 .max_window_size
2468 .saturating_add(requested_dict_budget)
2469 .min(MAX_PRIMED_WINDOW_SIZE);
2470 }
2471 super::strategy::BackendTag::Row => {
2472 let matcher = self.row_matcher_mut();
2473 matcher.max_window_size = matcher
2474 .max_window_size
2475 .saturating_add(requested_dict_budget)
2476 .min(MAX_PRIMED_WINDOW_SIZE);
2477 }
2478 super::strategy::BackendTag::HashChain => {
2479 let matcher = self.hc_matcher_mut();
2480 matcher.table.max_window_size = matcher
2481 .table
2482 .max_window_size
2483 .saturating_add(requested_dict_budget)
2484 .min(MAX_PRIMED_WINDOW_SIZE);
2485 }
2486 }
2487
2488 let mut start = 0usize;
2489 let mut committed_dict_budget = 0usize;
2490 let min_primed_tail = match self.active_backend() {
2494 super::strategy::BackendTag::Simple => MIN_MATCH_LEN,
2495 super::strategy::BackendTag::Dfast
2496 | super::strategy::BackendTag::Row
2497 | super::strategy::BackendTag::HashChain => 4,
2498 };
2499 while start < dict_content.len() {
2500 let end = (start + self.slice_size).min(dict_content.len());
2501 if end - start < min_primed_tail {
2502 break;
2503 }
2504 let mut space = self.get_next_space();
2505 space.clear();
2506 space.extend_from_slice(&dict_content[start..end]);
2507 self.commit_space(space);
2508 self.skip_matching_for_dictionary_priming();
2509 committed_dict_budget += end - start;
2510 start = end;
2511 }
2512
2513 let capped_retained_budget = MAX_PRIMED_WINDOW_SIZE.saturating_sub(base_max_window_size);
2523 let granted_retained_budget = committed_dict_budget.min(capped_retained_budget);
2524 let final_max_window_size = base_max_window_size.saturating_add(granted_retained_budget);
2525 match self.active_backend() {
2526 super::strategy::BackendTag::Simple => {
2527 self.simple_mut().max_window_size = final_max_window_size;
2528 }
2529 super::strategy::BackendTag::Dfast => {
2530 self.dfast_matcher_mut().max_window_size = final_max_window_size;
2531 }
2532 super::strategy::BackendTag::Row => {
2533 self.row_matcher_mut().max_window_size = final_max_window_size;
2534 }
2535 super::strategy::BackendTag::HashChain => {
2536 self.hc_matcher_mut().table.max_window_size = final_max_window_size;
2537 }
2538 }
2539 if granted_retained_budget > 0 {
2540 self.dictionary_retained_budget = self
2541 .dictionary_retained_budget
2542 .saturating_add(granted_retained_budget);
2543 }
2544 if self.active_backend() == super::strategy::BackendTag::HashChain {
2545 let table = &mut self.hc_matcher_mut().table;
2546 table.set_dictionary_limit_from_primed_bytes(committed_dict_budget);
2547 if table.uses_bt {
2551 table.prime_dms_bt(committed_dict_budget);
2552 }
2553 }
2554 match self.active_backend() {
2560 super::strategy::BackendTag::Simple => self.simple_mut().mark_dict_primed(),
2561 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().mark_dict_primed(),
2562 super::strategy::BackendTag::Row => self.row_matcher_mut().mark_dict_primed(),
2563 _ => {}
2564 }
2565 }
2566
2567 fn restore_primed_dictionary(&mut self, level: super::CompressionLevel) -> bool {
2568 let Some((params, table_bits, fast_attach, ldm)) = self.reset_shape else {
2579 return false;
2580 };
2581 let key = PrimedKey {
2582 level,
2583 params,
2584 table_bits,
2585 fast_attach,
2586 ldm,
2587 };
2588 let Some((snapshot, budget, captured_key)) = &self.primed else {
2589 return false;
2590 };
2591 if *captured_key != key {
2592 return false;
2593 }
2594 let budget = *budget;
2595 match (&mut self.storage, snapshot) {
2596 (MatcherStorage::Simple(live), MatcherStorage::Simple(snap)) => {
2602 live.clone_from(snap);
2603 }
2604 (live, snapshot_storage) => {
2605 let mut storage = snapshot_storage.clone();
2606 if let MatcherStorage::HashChain(hc) = &mut storage {
2617 hc.table.ensure_tables();
2618 }
2619 #[cfg(feature = "hash")]
2626 {
2627 let fresh_ldm = if let MatcherStorage::HashChain(hc) = live {
2628 hc.take_ldm_producer()
2629 } else {
2630 None
2631 };
2632 if let MatcherStorage::HashChain(hc) = &mut storage {
2633 hc.set_ldm_producer(fresh_ldm);
2634 }
2635 }
2636 *live = storage;
2637 }
2638 }
2639 self.dictionary_retained_budget = budget;
2640 true
2641 }
2642
2643 fn capture_primed_dictionary(&mut self, level: super::CompressionLevel) {
2644 let Some((params, table_bits, fast_attach, ldm)) = self.reset_shape else {
2647 return;
2648 };
2649 let key = PrimedKey {
2650 level,
2651 params,
2652 table_bits,
2653 fast_attach,
2654 ldm,
2655 };
2656 let bt_decoupled = matches!(
2669 &self.storage,
2670 MatcherStorage::HashChain(hc) if hc.table.uses_bt
2671 );
2672 if bt_decoupled {
2673 let MatcherStorage::HashChain(hc) = &mut self.storage else {
2674 unreachable!("bt_decoupled implies HashChain storage");
2675 };
2676 let hash_table = core::mem::take(&mut hc.table.hash_table);
2677 let chain_table = core::mem::take(&mut hc.table.chain_table);
2678 let hash3_table = core::mem::take(&mut hc.table.hash3_table);
2679 #[cfg(feature = "hash")]
2684 let ldm_producer = hc.take_ldm_producer();
2685 let snapshot = self.storage.clone();
2688 let MatcherStorage::HashChain(hc) = &mut self.storage else {
2690 unreachable!("storage variant is stable across the take/put");
2691 };
2692 hc.table.hash_table = hash_table;
2693 hc.table.chain_table = chain_table;
2694 hc.table.hash3_table = hash3_table;
2695 #[cfg(feature = "hash")]
2696 hc.set_ldm_producer(ldm_producer);
2697 self.primed = Some((snapshot, self.dictionary_retained_budget, key));
2698 } else {
2699 self.primed = Some((self.storage.clone(), self.dictionary_retained_budget, key));
2700 }
2701 }
2702
2703 fn invalidate_primed_dictionary(&mut self) {
2704 self.primed = None;
2705 match self.active_backend() {
2710 super::strategy::BackendTag::Simple => self.simple_mut().invalidate_dict_cache(),
2711 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().invalidate_dict_cache(),
2712 super::strategy::BackendTag::Row => self.row_matcher_mut().invalidate_dict_cache(),
2717 super::strategy::BackendTag::HashChain => {
2722 self.hc_matcher_mut().table.dms.invalidate();
2723 }
2724 }
2725 }
2726
2727 fn seed_dictionary_entropy(
2728 &mut self,
2729 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
2730 ll: Option<&crate::fse::fse_encoder::FSETable>,
2731 ml: Option<&crate::fse::fse_encoder::FSETable>,
2732 of: Option<&crate::fse::fse_encoder::FSETable>,
2733 ) {
2734 if self.active_backend() == super::strategy::BackendTag::HashChain {
2735 self.hc_matcher_mut()
2736 .seed_dictionary_entropy(huff, ll, ml, of);
2737 }
2738 }
2739
2740 fn window_size(&self) -> u64 {
2741 self.reported_window_size as u64
2742 }
2743
2744 fn get_next_space(&mut self) -> Vec<u8> {
2745 if let Some(mut space) = self.vec_pool.pop() {
2746 if space.len() > self.slice_size {
2747 space.truncate(self.slice_size);
2748 }
2749 if space.len() < self.slice_size {
2750 space.resize(self.slice_size, 0);
2751 }
2752 return space;
2753 }
2754 alloc::vec![0; self.slice_size]
2755 }
2756
2757 fn get_last_space(&mut self) -> &[u8] {
2758 match &self.storage {
2759 MatcherStorage::Simple(m) => m.last_committed_space(),
2760 MatcherStorage::Dfast(m) => m.get_last_space(),
2761 MatcherStorage::Row(m) => m.get_last_space(),
2762 MatcherStorage::HashChain(m) => m.table.get_last_space(),
2763 }
2764 }
2765
2766 fn commit_space(&mut self, space: Vec<u8>) {
2767 let mut evicted_bytes = 0usize;
2768 let vec_pool = &mut self.vec_pool;
2774 match &mut self.storage {
2775 MatcherStorage::Simple(m) => {
2776 let pre = m.history_len_for_eviction_accounting();
2786 m.accept_data(space);
2787 let post = m.history_len_for_eviction_accounting();
2788 evicted_bytes += pre.saturating_sub(post);
2799 }
2800 MatcherStorage::Dfast(m) => {
2801 let pre = m.window_size;
2823 let space_len = space.len();
2824 m.add_data(space, |data| {
2825 vec_pool.push(data);
2833 });
2834 evicted_bytes += (pre + space_len).saturating_sub(m.window_size);
2837 }
2838 MatcherStorage::Row(m) => {
2839 let pre = m.window_size;
2848 let space_len = space.len();
2849 m.add_data(space, |data| {
2850 vec_pool.push(data);
2855 });
2856 evicted_bytes += (pre + space_len).saturating_sub(m.window_size);
2859 }
2860 MatcherStorage::HashChain(m) => {
2861 let pre = m.table.window_size;
2868 let space_len = space.len();
2869 m.table.add_data(space, |data| {
2870 vec_pool.push(data);
2880 });
2881 evicted_bytes += (pre + space_len).saturating_sub(m.table.window_size);
2884 }
2885 }
2886 if self.retire_dictionary_budget(evicted_bytes) {
2896 self.trim_after_budget_retire();
2897 }
2898 }
2899
2900 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2901 use super::strategy::{self, StrategyTag};
2902 if let Some((block_start, block_end)) = self.borrowed_pending.take() {
2908 match self.active_backend() {
2909 super::strategy::BackendTag::Simple => self.simple_mut().start_matching_borrowed(
2910 block_start,
2911 block_end,
2912 &mut handle_sequence,
2913 ),
2914 super::strategy::BackendTag::Dfast => self
2915 .dfast_matcher_mut()
2916 .start_matching_borrowed(block_start, block_end, &mut handle_sequence),
2917 super::strategy::BackendTag::Row => {
2918 let greedy = self.parse == super::strategy::ParseMode::Greedy;
2920 self.row_matcher_mut().start_matching_borrowed(
2921 block_start,
2922 block_end,
2923 greedy,
2924 &mut handle_sequence,
2925 );
2926 }
2927 super::strategy::BackendTag::HashChain => match self.search {
2928 super::strategy::SearchMethod::HashChain => self
2929 .hc_matcher_mut()
2930 .start_matching_lazy_borrowed(block_start, block_end, &mut handle_sequence),
2931 super::strategy::SearchMethod::BinaryTree => {
2932 match self.strategy_tag {
2948 StrategyTag::Btlazy2 => self
2949 .hc_matcher_mut()
2950 .start_matching_btlazy2(&mut handle_sequence),
2951 other => unreachable!(
2952 "borrowed BinaryTree scan is only supported for Btlazy2, got {other:?}"
2953 ),
2954 }
2955 }
2956 other => {
2957 unreachable!("HashChain backend with unexpected search {other:?}")
2958 }
2959 },
2960 }
2961 return;
2962 }
2963 use super::strategy::SearchMethod;
2972 match self.search {
2973 SearchMethod::Fast => {
2974 self.simple_mut().start_matching(&mut handle_sequence);
2975 self.recycle_simple_space();
2976 }
2977 SearchMethod::DoubleFast => {
2978 self.dfast_matcher_mut()
2979 .start_matching(&mut handle_sequence);
2980 }
2981 SearchMethod::RowHash => {
2982 let greedy = self.parse == super::strategy::ParseMode::Greedy;
2988 let row = self.row_matcher_mut();
2989 if greedy {
2990 row.start_matching_greedy(&mut handle_sequence);
2991 } else {
2992 row.start_matching(&mut handle_sequence);
2993 }
2994 }
2995 SearchMethod::HashChain => {
2996 self.hc_matcher_mut()
2999 .start_matching_lazy(&mut handle_sequence);
3000 }
3001 SearchMethod::BinaryTree => match self.strategy_tag {
3002 StrategyTag::Btlazy2 => self
3003 .hc_matcher_mut()
3004 .start_matching_btlazy2(&mut handle_sequence),
3005 StrategyTag::BtOpt => self.compress_block::<strategy::BtOpt>(&mut handle_sequence),
3006 StrategyTag::BtUltra => {
3007 self.compress_block::<strategy::BtUltra>(&mut handle_sequence)
3008 }
3009 StrategyTag::BtUltra2 => {
3010 self.compress_block::<strategy::BtUltra2>(&mut handle_sequence)
3011 }
3012 _ => unreachable!(
3013 "SearchMethod::BinaryTree requires a BT strategy tag (Btlazy2/BtOpt/BtUltra/BtUltra2)"
3014 ),
3015 },
3016 }
3017 }
3018
3019 fn skip_matching(&mut self) {
3020 self.skip_matching_with_hint(None);
3021 }
3022
3023 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
3024 if let Some((block_start, block_end)) = self.borrowed_pending.take() {
3029 match self.active_backend() {
3030 super::strategy::BackendTag::Simple => self.simple_mut().skip_matching_borrowed(
3031 block_start,
3032 block_end,
3033 incompressible_hint,
3034 ),
3035 super::strategy::BackendTag::Dfast => self
3036 .dfast_matcher_mut()
3037 .skip_matching_borrowed(block_start, block_end, incompressible_hint),
3038 super::strategy::BackendTag::Row => self.row_matcher_mut().skip_matching_borrowed(
3039 block_start,
3040 block_end,
3041 incompressible_hint,
3042 ),
3043 super::strategy::BackendTag::HashChain => self
3044 .hc_matcher_mut()
3045 .skip_matching_borrowed(block_start, block_end, incompressible_hint),
3046 }
3047 return;
3048 }
3049 match self.active_backend() {
3050 super::strategy::BackendTag::Simple => {
3051 self.simple_mut()
3052 .skip_matching_with_hint(incompressible_hint);
3053 self.recycle_simple_space();
3054 }
3055 super::strategy::BackendTag::Dfast => {
3056 self.dfast_matcher_mut().skip_matching(incompressible_hint)
3057 }
3058 super::strategy::BackendTag::Row => self
3059 .row_matcher_mut()
3060 .skip_matching_with_hint(incompressible_hint),
3061 super::strategy::BackendTag::HashChain => {
3062 self.hc_matcher_mut().skip_matching(incompressible_hint)
3063 }
3064 }
3065 }
3066}
3067
3068impl MatchGeneratorDriver {
3069 fn compress_block<S: super::strategy::Strategy>(
3079 &mut self,
3080 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
3081 ) {
3082 debug_assert_eq!(S::BACKEND, super::strategy::BackendTag::HashChain);
3083 debug_assert!(
3084 S::USE_BT,
3085 "compress_block only handles the optimal (BT) path"
3086 );
3087 self.hc_matcher_mut()
3088 .start_matching_strategy::<S>(handle_sequence);
3089 }
3090}
3091
3092#[derive(Clone)]
3106pub(crate) enum HcBackend {
3107 Hc,
3109 Bt(alloc::boxed::Box<super::bt::BtMatcher>),
3113}
3114
3115impl HcBackend {
3116 fn heap_size(&self) -> usize {
3119 match self {
3120 Self::Hc => 0,
3121 Self::Bt(bt) => core::mem::size_of::<super::bt::BtMatcher>() + bt.heap_size(),
3122 }
3123 }
3124
3125 #[inline(always)]
3132 pub(crate) fn bt_mut(&mut self) -> &mut super::bt::BtMatcher {
3133 match self {
3134 Self::Bt(bt) => bt,
3135 Self::Hc => unreachable!("BT-only accessor called in HC mode"),
3136 }
3137 }
3138}
3139
3140#[derive(Clone)]
3141struct HcMatchGenerator {
3142 table: super::match_table::storage::MatchTable,
3147 hc: super::hc::HcMatcher,
3151 backend: HcBackend,
3156 strategy_tag: super::strategy::StrategyTag,
3168}
3169
3170macro_rules! bt_insert_step_no_rebase_body {
3186 ($table:expr, $search_depth:expr, $abs_pos:ident, $current_abs_end:ident, $target_abs:ident, $cmf:path) => {{
3187 let idx = $abs_pos - $table.history_abs_start;
3188 let concat: &[u8] = unsafe {
3193 let lh = $table.live_history();
3194 core::slice::from_raw_parts(lh.as_ptr(), lh.len())
3195 };
3196 if idx + 8 > concat.len() {
3197 return 1;
3198 }
3199 debug_assert!(
3200 $abs_pos <= $current_abs_end,
3201 "BT walker called past current block end"
3202 );
3203 let tail_limit = $current_abs_end - $abs_pos;
3204 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
3205 concat,
3206 idx,
3207 $table.hash_log,
3208 $table.search_mls,
3209 );
3210 #[cfg(all(
3218 target_feature = "sse",
3219 any(target_arch = "x86", target_arch = "x86_64")
3220 ))]
3221 {
3222 #[cfg(target_arch = "x86")]
3223 use core::arch::x86::{_MM_HINT_T0, _mm_prefetch};
3224 #[cfg(target_arch = "x86_64")]
3225 use core::arch::x86_64::{_MM_HINT_T0, _mm_prefetch};
3226 unsafe {
3229 _mm_prefetch($table.hash_table.as_ptr().add(hash).cast(), _MM_HINT_T0);
3230 }
3231 if idx + 1 + 8 <= concat.len() {
3237 let hash_next =
3238 $crate::encoding::match_table::storage::MatchTable::hash_position_at(
3239 concat,
3240 idx + 1,
3241 $table.hash_log,
3242 $table.search_mls,
3243 );
3244 unsafe {
3247 _mm_prefetch(
3248 $table.hash_table.as_ptr().add(hash_next).cast(),
3249 _MM_HINT_T0,
3250 );
3251 }
3252 }
3253 }
3254 let Some(relative_pos) = $table.relative_position($abs_pos) else {
3255 return 1;
3256 };
3257 let stored = relative_pos + 1;
3258 let bt_mask = $table.bt_mask();
3259 let bt_low = $abs_pos.saturating_sub(bt_mask);
3265 let chain_ptr = $table.chain_table.as_mut_ptr();
3269 debug_assert_eq!($table.chain_table.len(), 2 << $table.bt_log());
3270 let window_low = $table.window_low_abs_for_target($target_abs);
3271 let mut match_end_abs = $abs_pos + 9;
3280 let mut best_len = 8usize;
3281 let mut compares_left = $search_depth;
3282 let mut common_length_smaller = 0usize;
3283 let mut common_length_larger = 0usize;
3284 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
3285 let mut smaller_slot = pair_idx;
3286 let mut larger_slot = pair_idx + 1;
3287 let mut match_stored = $table.hash_table[hash];
3288 $table.hash_table[hash] = stored;
3289
3290 while compares_left > 0 {
3291 if match_stored == $crate::encoding::match_table::storage::HC_EMPTY {
3292 break;
3293 }
3294 let Some(candidate_abs) = ($table.position_base + (match_stored as usize - 1))
3304 .checked_sub($table.index_shift)
3305 else {
3306 break;
3307 };
3308 if candidate_abs < window_low || candidate_abs >= $abs_pos {
3309 break;
3310 }
3311 compares_left -= 1;
3312
3313 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
3314 let next_smaller = unsafe { *chain_ptr.add(next_pair_idx) };
3318 let next_larger = unsafe { *chain_ptr.add(next_pair_idx + 1) };
3319 let seed_len = common_length_smaller.min(common_length_larger);
3320 let candidate_idx = candidate_abs - $table.history_abs_start;
3321 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
3326
3327 if match_len > best_len {
3328 best_len = match_len;
3329 let candidate_end = candidate_abs + match_len;
3333 if candidate_end > match_end_abs {
3334 match_end_abs = candidate_end;
3335 }
3336 }
3337
3338 if match_len >= tail_limit {
3339 break;
3340 }
3341
3342 let candidate_next = candidate_idx + match_len;
3343 let current_next = idx + match_len;
3344 if unsafe {
3348 *concat.get_unchecked(candidate_next) < *concat.get_unchecked(current_next)
3349 } {
3350 unsafe { *chain_ptr.add(smaller_slot) = match_stored };
3354 common_length_smaller = match_len;
3355 if candidate_abs <= bt_low {
3356 smaller_slot = usize::MAX;
3357 break;
3358 }
3359 smaller_slot = next_pair_idx + 1;
3360 match_stored = next_larger;
3361 } else {
3362 unsafe { *chain_ptr.add(larger_slot) = match_stored };
3364 common_length_larger = match_len;
3365 if candidate_abs <= bt_low {
3366 larger_slot = usize::MAX;
3367 break;
3368 }
3369 larger_slot = next_pair_idx;
3370 match_stored = next_smaller;
3371 }
3372 }
3373
3374 if smaller_slot != usize::MAX {
3377 unsafe {
3378 *chain_ptr.add(smaller_slot) = $crate::encoding::match_table::storage::HC_EMPTY
3379 };
3380 }
3381 if larger_slot != usize::MAX {
3382 unsafe {
3383 *chain_ptr.add(larger_slot) = $crate::encoding::match_table::storage::HC_EMPTY
3384 };
3385 }
3386
3387 let speed_positions = if best_len > 384 {
3388 (best_len - 384).min(192)
3389 } else {
3390 0
3391 };
3392 speed_positions.max(match_end_abs - ($abs_pos + 8))
3402 }};
3403}
3404pub(crate) use bt_insert_step_no_rebase_body;
3405
3406#[inline]
3426fn btlazy2_offbase(offset: usize, reps: [u32; 3], ll0: bool) -> u32 {
3427 let o = offset as u32;
3428 if ll0 {
3434 if o == reps[1] {
3435 1
3436 } else if o == reps[2] {
3437 2
3438 } else if reps[0] > 1 && o == reps[0] - 1 {
3439 3
3440 } else {
3441 o + 3
3443 }
3444 } else if o == reps[0] {
3445 1
3446 } else if o == reps[1] {
3447 2
3448 } else if o == reps[2] {
3449 3
3450 } else {
3451 o + 3
3453 }
3454}
3455
3456#[inline]
3460fn btlazy2_gain(match_len: usize, offset: usize, reps: [u32; 3], ll0: bool) -> i64 {
3461 let offbase = btlazy2_offbase(offset, reps, ll0);
3462 (match_len as i64) * 4 - (31 - offbase.leading_zeros()) as i64
3463}
3464
3465macro_rules! start_matching_btlazy2_body {
3473 ($self:ident, $handle_sequence:ident, $collect:ident, $cmf:path $(,)?) => {{
3474 $self.table.ensure_tables();
3475 let (current_abs_start, current_len) = $self.table.current_block_range();
3477 if current_len == 0 {
3478 return;
3479 }
3480 let current_ptr = $self.table.get_last_space().as_ptr();
3481 let current: &[u8] = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
3484 let history_abs_start = $self.table.history_abs_start;
3492 let concat_full: &[u8] = unsafe {
3493 let lh = $self.table.live_history();
3494 core::slice::from_raw_parts(lh.as_ptr(), lh.len())
3495 };
3496 let current_abs_end = current_abs_start + current_len;
3497 $self
3498 .table
3499 .apply_limited_update_after_long_match(current_abs_start);
3500 $self
3501 .table
3502 .backfill_boundary_positions(current_abs_start, current_abs_end);
3503
3504 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::Btlazy2>();
3505 let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
3506
3507 let depth = $self.hc.lazy_depth as usize;
3508 let mut pos = 0usize;
3509 let mut literals_start = 0usize;
3510
3511 macro_rules! bt_select {
3517 ($p:expr) => {{
3518 let sel_pos: usize = $p;
3519 let ll0 = sel_pos == literals_start;
3522 let sel_abs = current_abs_start + sel_pos;
3523 candidates.clear();
3524 let query = HcCandidateQuery {
3525 reps: $self.table.offset_hist,
3526 lit_len: sel_pos - literals_start,
3527 ldm_candidate: None,
3530 };
3531 unsafe {
3534 $self.$collect::<super::strategy::Btlazy2, true>(
3535 sel_abs,
3536 current_abs_end,
3537 profile,
3538 query,
3539 &mut candidates,
3540 );
3541 }
3542 let reps = $self.table.offset_hist;
3543 let mut sel_ml = 0usize;
3544 let mut sel_off = 0usize;
3545 let mut sel_gain = i64::MIN;
3546 for c in candidates.iter() {
3547 let ml = c.match_len.min(current_len - sel_pos);
3548 if ml < HC_OPT_MIN_MATCH_LEN {
3549 continue;
3550 }
3551 let g = btlazy2_gain(ml, c.offset, reps, ll0);
3552 if g > sel_gain {
3553 sel_gain = g;
3554 sel_ml = ml;
3555 sel_off = c.offset;
3556 }
3557 }
3558 let sel_idx = sel_abs - history_abs_start;
3559 let probe_rep = if ll0 {
3563 reps[1] as usize
3564 } else {
3565 reps[0] as usize
3566 };
3567 if probe_rep != 0 && sel_idx >= probe_rep {
3568 let tail = current_len - sel_pos;
3569 let rep_ml =
3573 unsafe { $cmf(concat_full, sel_idx, sel_idx - probe_rep, tail, 0) };
3574 if rep_ml >= HC_OPT_MIN_MATCH_LEN
3575 && btlazy2_gain(rep_ml, probe_rep, reps, ll0) > sel_gain
3576 {
3577 sel_ml = rep_ml;
3578 sel_off = probe_rep;
3579 }
3580 }
3581 (sel_ml, sel_off)
3582 }};
3583 }
3584
3585 while pos + HC_OPT_MIN_MATCH_LEN <= current_len {
3586 let (mut best_ml, mut best_off) = bt_select!(pos);
3587 if best_ml < HC_OPT_MIN_MATCH_LEN {
3588 pos += 1;
3589 continue;
3590 }
3591 let mut start = pos;
3596 let mut d = 0usize;
3597 while d < depth && start + 1 + HC_OPT_MIN_MATCH_LEN <= current_len {
3598 let look = start + 1;
3599 let (ml2, off2) = bt_select!(look);
3600 if ml2 < HC_OPT_MIN_MATCH_LEN {
3601 break;
3602 }
3603 let reps = $self.table.offset_hist;
3604 let margin = if d == 0 { 4 } else { 7 };
3605 let gain1 = btlazy2_gain(best_ml, best_off, reps, start == literals_start) + margin;
3608 let gain2 = btlazy2_gain(ml2, off2, reps, false);
3609 if gain2 > gain1 {
3610 best_ml = ml2;
3611 best_off = off2;
3612 start = look;
3613 d += 1;
3614 } else {
3615 break;
3616 }
3617 }
3618 let lit_len = start - literals_start;
3622 let literals = ¤t[literals_start..start];
3623 $handle_sequence(Sequence::Triple {
3624 literals,
3625 offset: best_off,
3626 match_len: best_ml,
3627 });
3628 let _ = encode_offset_with_history(
3629 best_off as u32,
3630 lit_len as u32,
3631 &mut $self.table.offset_hist,
3632 );
3633 pos = start + best_ml;
3634 literals_start = pos;
3635 }
3636
3637 if literals_start < current_len {
3638 $handle_sequence(Sequence::Literals {
3639 literals: ¤t[literals_start..],
3640 });
3641 }
3642 $self.backend.bt_mut().opt_candidates_scratch = candidates;
3643 }};
3644}
3645
3646macro_rules! build_optimal_plan_impl_body {
3647 (
3648 $self:expr,
3649 $strategy_ty:ty,
3650 $current:ident,
3651 $current_abs_start:ident,
3652 $current_len:ident,
3653 $initial_state:ident,
3654 $stats:ident,
3655 $out:ident,
3656 $collect:ident $(,)?
3657 ) => {{
3658 let current_abs_end = $current_abs_start + $current_len;
3659 let min_match_len = HC_OPT_MIN_MATCH_LEN;
3660 let frontier_limit = $current_len.min(HC_OPT_NUM - 1);
3662 let initial_reps = $initial_state.reps;
3663 let initial_litlen = $initial_state.litlen;
3664 let ldm_block_offset = $initial_state.block_offset;
3665 let mut profile = $initial_state.profile;
3666 profile.sufficient_match_len = $self.hc.sufficient_match_len_for_pass(profile);
3667 debug_assert!(
3679 <$strategy_ty as super::strategy::Strategy>::USE_BT,
3680 "build_optimal_plan_impl_body called on non-BT strategy"
3681 );
3682 let abort_on_worse_match: bool =
3683 <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL == 0;
3684 let opt_level: bool = <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL >= 2;
3685 let mut nodes = core::mem::take(&mut $self.backend.bt_mut().opt_nodes_scratch);
3686 let frontier_buffer_size = frontier_limit + 2;
3688 if nodes.len() < HC_OPT_NODE_LEN {
3689 nodes = alloc::vec![HcOptimalNode::default(); HC_OPT_NODE_LEN].into_boxed_slice();
3693 }
3694 let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
3695 candidates.clear();
3696 if candidates.capacity() < MAX_HC_SEARCH_DEPTH {
3697 candidates.reserve_exact(MAX_HC_SEARCH_DEPTH - candidates.capacity());
3698 }
3699 let mut store = core::mem::take(&mut $self.backend.bt_mut().opt_store_scratch);
3700 store.clear();
3701 let mut price_arena = core::mem::take(&mut $self.backend.bt_mut().opt_price_arena);
3702 if price_arena.len() < HC_OPT_PRICE_ARENA_LEN {
3703 price_arena = alloc::vec![[0u32; 2]; HC_OPT_PRICE_ARENA_LEN].into_boxed_slice();
3704 }
3705 let arena_base = price_arena.as_mut_ptr();
3721 let mut ll_cache: &mut [[u32; 2]] =
3722 unsafe { core::slice::from_raw_parts_mut(arena_base, HC_OPT_PRICE_STRIDE) };
3723 let mut ml_cache: &mut [[u32; 2]] = unsafe {
3724 core::slice::from_raw_parts_mut(arena_base.add(HC_OPT_PRICE_STRIDE), HC_OPT_PRICE_STRIDE)
3725 };
3726 $self.backend.bt_mut().opt_ll_price_stamp = $self
3727 .backend
3728 .bt_mut()
3729 .opt_ll_price_stamp
3730 .wrapping_add(1)
3731 .max(1);
3732 let ll_price_stamp = $self.backend.bt_mut().opt_ll_price_stamp;
3733 $self.backend.bt_mut().opt_lit_price_stamp = $self
3734 .backend
3735 .bt_mut()
3736 .opt_lit_price_stamp
3737 .wrapping_add(1)
3738 .max(1);
3739 let lit_price_stamp = $self.backend.bt_mut().opt_lit_price_stamp;
3740 $self.backend.bt_mut().opt_ml_price_stamp = $self
3741 .backend
3742 .bt_mut()
3743 .opt_ml_price_stamp
3744 .wrapping_add(1)
3745 .max(1);
3746 let ml_price_stamp = $self.backend.bt_mut().opt_ml_price_stamp;
3747 nodes[0] = HcOptimalNode {
3748 price: BtMatcher::cached_lit_length_price(
3749 profile,
3750 $stats,
3751 initial_litlen,
3752 &mut ll_cache,
3753 ll_price_stamp,
3754 ),
3755 litlen: initial_litlen as u32,
3756 reps: initial_reps,
3757 ..HcOptimalNode::default()
3758 };
3759 let sufficient_len = profile.sufficient_match_len;
3760 let ll0_price = BtMatcher::cached_lit_length_price(
3761 profile,
3762 $stats,
3763 0,
3764 &mut ll_cache,
3765 ll_price_stamp,
3766 );
3767 let ll1_price = BtMatcher::cached_lit_length_price(
3768 profile,
3769 $stats,
3770 1,
3771 &mut ll_cache,
3772 ll_price_stamp,
3773 );
3774 let mut pos = 1usize;
3775 let mut last_pos = 0usize;
3776 let mut forced_end: Option<usize> = None;
3777 let mut forced_end_state: Option<HcOptimalNode> = None;
3778 let mut seed_forced_shortest_path = false;
3779 let mut opt_ldm = HcOptLdmState {
3780 seq_store: HcRawSeqStore {
3781 pos: 0,
3782 pos_in_sequence: 0,
3783 size: $self.backend.bt_mut().ldm_sequences.len(),
3784 },
3785 ..HcOptLdmState::default()
3786 };
3787 let has_ldm = !$self.backend.bt_mut().ldm_sequences.is_empty();
3788 if has_ldm {
3789 if ldm_block_offset > 0 {
3801 $self
3802 .backend
3803 .bt_mut()
3804 .ldm_skip_raw_seq_store_bytes(&mut opt_ldm.seq_store, ldm_block_offset);
3805 }
3806 $self
3807 .backend
3808 .bt_mut()
3809 .ldm_get_next_match_and_update_seq_store(&mut opt_ldm, 0, $current_len);
3810 }
3811
3812 if $current_len >= min_match_len {
3815 let seed_ldm = if has_ldm {
3816 $self.backend.bt_mut().ldm_process_match_candidate(
3817 &mut opt_ldm,
3818 0,
3819 $current_len,
3820 min_match_len,
3821 )
3822 } else {
3823 None
3824 };
3825 candidates.clear();
3826 unsafe {
3830 $self.$collect::<$strategy_ty, true>(
3831 $current_abs_start,
3832 current_abs_end,
3833 profile,
3834 HcCandidateQuery {
3835 reps: initial_reps,
3836 lit_len: initial_litlen,
3837 ldm_candidate: seed_ldm,
3838 },
3839 &mut candidates,
3840 )
3841 };
3842 if !candidates.is_empty() {
3843 last_pos = (min_match_len - 1).min(frontier_limit);
3845 for p in 1..min_match_len.min(frontier_buffer_size) {
3846 BtMatcher::reset_opt_node(&mut nodes[p]);
3847 let seed_litlen = initial_litlen
3857 .checked_add(p)
3858 .and_then(|s| u32::try_from(s).ok())
3859 .expect("optimal parser seed litlen out of u32 range");
3860 nodes[p].litlen = seed_litlen;
3861 }
3862 }
3863
3864 if let Some(candidate) = candidates.last() {
3865 let longest_len = candidate.match_len.min($current_len);
3866 if longest_len > sufficient_len {
3867 let off_base = BtMatcher::encode_offset_base_with_reps(
3868 candidate.offset as u32,
3869 initial_litlen,
3870 initial_reps,
3871 );
3872 let off_price = profile
3873 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3874 let ml_price = BtMatcher::cached_match_length_price(
3875 profile,
3876 $stats,
3877 longest_len,
3878 &mut ml_cache,
3879 ml_price_stamp,
3880 );
3881 let seq_cost = BtMatcher::add_prices(
3882 ll0_price,
3883 profile.match_price_from_parts(off_price, ml_price, $stats),
3884 );
3885 let forced_price = BtMatcher::add_prices(nodes[0].price, seq_cost);
3886 let forced_state = HcOptimalNode {
3887 price: forced_price,
3888 off: candidate.offset as u32,
3889 mlen: longest_len as u32,
3890 litlen: 0,
3891 reps: initial_reps,
3892 };
3893 if longest_len < frontier_buffer_size && forced_price < nodes[longest_len].price {
3894 nodes[longest_len] = forced_state;
3895 }
3896 forced_end = Some(longest_len);
3897 forced_end_state = Some(forced_state);
3898 seed_forced_shortest_path = true;
3899 }
3900 }
3901 if !seed_forced_shortest_path {
3902 let mut prev_max_len = min_match_len - 1;
3903 for candidate in candidates.iter() {
3904 let max_match_len = candidate.match_len.min(frontier_limit);
3905 if max_match_len < min_match_len {
3906 continue;
3907 }
3908 let start_len = (prev_max_len + 1).max(min_match_len);
3909 if start_len > max_match_len {
3910 prev_max_len = prev_max_len.max(max_match_len);
3911 continue;
3912 }
3913 if max_match_len > last_pos {
3914 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_match_len);
3915 }
3916 let off_base = BtMatcher::encode_offset_base_with_reps(
3917 candidate.offset as u32,
3918 initial_litlen,
3919 initial_reps,
3920 );
3921 let off_price = profile
3922 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3923 debug_assert!(max_match_len < frontier_buffer_size);
3924 let nodes0_price = nodes[0].price;
3925 for match_len in (start_len..=max_match_len).rev() {
3926 let ml_price = BtMatcher::cached_match_length_price(
3927 profile,
3928 $stats,
3929 match_len,
3930 &mut ml_cache,
3931 ml_price_stamp,
3932 );
3933 let seq_cost = BtMatcher::add_prices(
3934 ll0_price,
3935 profile.match_price_from_parts(off_price, ml_price, $stats),
3936 );
3937 let next_cost = BtMatcher::add_prices(nodes0_price, seq_cost);
3938 let node_price = unsafe { nodes.get_unchecked(match_len).price };
3939 if match_len > last_pos || next_cost < node_price {
3940 let slot = unsafe { nodes.get_unchecked_mut(match_len) };
3941 *slot = HcOptimalNode {
3942 price: next_cost,
3943 off: candidate.offset as u32,
3944 mlen: match_len as u32,
3945 litlen: 0,
3946 reps: initial_reps,
3947 };
3948 if match_len > last_pos {
3949 last_pos = match_len;
3950 }
3951 } else if abort_on_worse_match {
3952 break;
3953 }
3954 }
3955 prev_max_len = prev_max_len.max(max_match_len);
3956 }
3957 if last_pos + 1 < frontier_buffer_size {
3958 nodes[last_pos + 1].price = u32::MAX;
3959 }
3960 }
3961 }
3962 while !seed_forced_shortest_path && pos <= last_pos && pos <= frontier_limit {
3963 debug_assert!(pos + 1 < frontier_buffer_size);
3964 let prev_node = unsafe { *nodes.get_unchecked(pos - 1) };
3965 if prev_node.price != u32::MAX {
3966 let lit_len = prev_node.litlen as usize + 1;
3967 let lit_price = {
3968 let bt = $self.backend.bt_mut();
3969 BtMatcher::cached_literal_price(
3970 profile,
3971 $stats,
3972 $current[pos - 1],
3973 &mut bt.opt_lit_price_scratch,
3974 &mut bt.opt_lit_price_generation,
3975 lit_price_stamp,
3976 )
3977 };
3978 let ll_delta = BtMatcher::cached_lit_length_delta_price(
3979 profile,
3980 $stats,
3981 lit_len,
3982 &mut ll_cache,
3983 ll_price_stamp,
3984 );
3985 let lit_cost = BtMatcher::add_price_delta(prev_node.price, lit_price, ll_delta);
3986 let node_pos_price = unsafe { nodes.get_unchecked(pos).price };
3987 if lit_cost <= node_pos_price {
3988 let prev_match = unsafe { *nodes.get_unchecked(pos) };
3989 let slot = unsafe { nodes.get_unchecked_mut(pos) };
3990 *slot = prev_node;
3991 slot.litlen = lit_len as u32;
3992 slot.price = lit_cost;
3993 #[allow(clippy::collapsible_if)]
3994 if opt_level
3995 && prev_match.mlen > 0
3996 && prev_match.litlen == 0
3997 && pos < $current_len
3998 {
3999 if ll1_price < ll0_price {
4000 let next_lit_price = {
4001 let bt = $self.backend.bt_mut();
4002 BtMatcher::cached_literal_price(
4003 profile,
4004 $stats,
4005 $current[pos],
4006 &mut bt.opt_lit_price_scratch,
4007 &mut bt.opt_lit_price_generation,
4008 lit_price_stamp,
4009 )
4010 };
4011 let with1literal = BtMatcher::add_price_delta(
4012 prev_match.price,
4013 next_lit_price,
4014 ll1_price as i32 - ll0_price as i32,
4015 );
4016 let ll_delta_next = BtMatcher::cached_lit_length_delta_price(
4017 profile,
4018 $stats,
4019 lit_len + 1,
4020 &mut ll_cache,
4021 ll_price_stamp,
4022 );
4023 let with_more_literals =
4024 BtMatcher::add_price_delta(lit_cost, next_lit_price, ll_delta_next);
4025 let next = pos + 1;
4026 let next_price = unsafe { nodes.get_unchecked(next).price };
4027 if with1literal < with_more_literals && with1literal < next_price {
4028 debug_assert!(pos >= prev_match.mlen as usize);
4030 let prev_pos = pos - prev_match.mlen as usize;
4031 {
4032 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
4033 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
4034 prev_match.off,
4035 prev_state.litlen as usize,
4036 prev_state.reps,
4037 );
4038 let slot = unsafe { nodes.get_unchecked_mut(next) };
4039 *slot = prev_match;
4040 slot.reps = reps_after_match;
4041 slot.litlen = 1;
4042 slot.price = with1literal;
4043 if next > last_pos {
4044 last_pos = next;
4045 }
4046 }
4047 }
4048 }
4049 }
4050 }
4051 }
4052
4053 let base_cost = unsafe { nodes.get_unchecked(pos).price };
4061 if base_cost == u32::MAX {
4062 pos += 1;
4063 continue;
4064 }
4065 {
4066 let base_node = unsafe { *nodes.get_unchecked(pos) };
4067 if base_node.mlen > 0 && base_node.litlen == 0 {
4068 debug_assert!(pos >= base_node.mlen as usize);
4070 let prev_pos = pos - base_node.mlen as usize;
4071 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
4072 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
4073 base_node.off,
4074 prev_state.litlen as usize,
4075 prev_state.reps,
4076 );
4077 unsafe { nodes.get_unchecked_mut(pos).reps = reps_after_match };
4078 }
4079 }
4080
4081 if pos + 8 > $current_len {
4082 pos += 1;
4083 continue;
4084 }
4085
4086 if pos == last_pos {
4087 break;
4088 }
4089
4090 let next_price = unsafe { nodes.get_unchecked(pos + 1).price };
4091 if abort_on_worse_match
4097 && next_price <= base_cost.saturating_add(HC_BITCOST_MULTIPLIER / 2)
4098 {
4099 pos += 1;
4100 continue;
4101 }
4102
4103 let abs_pos = $current_abs_start + pos;
4104 let ldm_candidate = if has_ldm {
4105 $self.backend.bt_mut().ldm_process_match_candidate(
4106 &mut opt_ldm,
4107 pos,
4108 $current_len - pos,
4109 min_match_len,
4110 )
4111 } else {
4112 None
4113 };
4114 candidates.clear();
4115 unsafe {
4120 $self.$collect::<$strategy_ty, true>(
4121 abs_pos,
4122 current_abs_end,
4123 profile,
4124 HcCandidateQuery {
4125 reps: nodes.get_unchecked(pos).reps,
4126 lit_len: nodes.get_unchecked(pos).litlen as usize,
4127 ldm_candidate,
4128 },
4129 &mut candidates,
4130 )
4131 };
4132 let base_reps = unsafe { nodes.get_unchecked(pos).reps };
4136 let base_litlen = unsafe { nodes.get_unchecked(pos).litlen as usize };
4137 if let Some(candidate) = candidates.last() {
4138 let longest_len = candidate.match_len.min($current_len - pos);
4139 if longest_len > sufficient_len
4140 || pos + longest_len >= HC_OPT_NUM
4141 || pos + longest_len >= $current_len
4142 {
4143 let lit_len = base_litlen;
4144 let off_base = BtMatcher::encode_offset_base_with_reps(
4145 candidate.offset as u32,
4146 lit_len,
4147 base_reps,
4148 );
4149 let off_price = profile
4150 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
4151 let ml_price = BtMatcher::cached_match_length_price(
4152 profile,
4153 $stats,
4154 longest_len,
4155 &mut ml_cache,
4156 ml_price_stamp,
4157 );
4158 let seq_cost = BtMatcher::add_prices(
4159 ll0_price,
4160 profile.match_price_from_parts(off_price, ml_price, $stats),
4161 );
4162 let forced_price = BtMatcher::add_prices(base_cost, seq_cost);
4163 let end_pos = (pos + longest_len).min($current_len);
4164 forced_end = Some(end_pos);
4165 forced_end_state = Some(HcOptimalNode {
4166 price: forced_price,
4167 off: candidate.offset as u32,
4168 mlen: longest_len as u32,
4169 litlen: 0,
4170 reps: base_reps,
4171 });
4172 break;
4173 }
4174 }
4175 let mut prev_max_len = min_match_len - 1;
4176 for candidate in candidates.iter() {
4177 debug_assert!(pos <= frontier_limit);
4181 let max_match_len = candidate
4182 .match_len
4183 .min($current_len - pos)
4184 .min(frontier_limit - pos);
4185 let min_len = min_match_len;
4186 if max_match_len < min_len {
4187 continue;
4188 }
4189 let start_len = (prev_max_len + 1).max(min_len);
4190 if start_len > max_match_len {
4191 prev_max_len = prev_max_len.max(max_match_len);
4192 continue;
4193 }
4194 let max_next = pos + max_match_len;
4195 if max_next > last_pos {
4196 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_next);
4197 }
4198 let lit_len = base_litlen;
4199 let off_base = BtMatcher::encode_offset_base_with_reps(
4200 candidate.offset as u32,
4201 lit_len,
4202 base_reps,
4203 );
4204 let off_price = profile
4205 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
4206 debug_assert!(pos + max_match_len < frontier_buffer_size);
4207 for match_len in (start_len..=max_match_len).rev() {
4208 let next = pos + match_len;
4209 let ml_price = BtMatcher::cached_match_length_price(
4210 profile,
4211 $stats,
4212 match_len,
4213 &mut ml_cache,
4214 ml_price_stamp,
4215 );
4216 let seq_cost = BtMatcher::add_prices(
4217 ll0_price,
4218 profile.match_price_from_parts(off_price, ml_price, $stats),
4219 );
4220 let next_cost = BtMatcher::add_prices(base_cost, seq_cost);
4221 let node_next_price = unsafe { nodes.get_unchecked(next).price };
4222 let improved = next > last_pos || next_cost < node_next_price;
4223 if improved {
4224 let slot = unsafe { nodes.get_unchecked_mut(next) };
4225 *slot = HcOptimalNode {
4226 price: next_cost,
4227 off: candidate.offset as u32,
4228 mlen: match_len as u32,
4229 litlen: 0,
4230 reps: base_reps,
4231 };
4232 if next > last_pos {
4233 last_pos = next;
4234 }
4235 } else if abort_on_worse_match {
4236 break;
4237 }
4238 }
4239 prev_max_len = prev_max_len.max(max_match_len);
4240 }
4241
4242 if last_pos + 1 < frontier_buffer_size {
4243 unsafe {
4244 nodes.get_unchecked_mut(last_pos + 1).price = u32::MAX;
4245 }
4246 }
4247 pos += 1;
4248 }
4249
4250 if last_pos == 0 {
4251 if $current_len == 0 {
4252 let price = nodes[0].price;
4253 return $self.backend.bt_mut().finish_optimal_plan(
4254 HcOptimalPlanBuffers {
4255 nodes,
4256 candidates,
4257 store,
4258 price_arena,
4259 },
4260 (price, initial_reps, initial_litlen, 0),
4261 );
4262 }
4263 let lit_price = {
4264 let bt = $self.backend.bt_mut();
4265 BtMatcher::cached_literal_price(
4266 profile,
4267 $stats,
4268 $current[0],
4269 &mut bt.opt_lit_price_scratch,
4270 &mut bt.opt_lit_price_generation,
4271 lit_price_stamp,
4272 )
4273 };
4274 let next_litlen = initial_litlen
4281 .checked_add(1)
4282 .expect("optimal parser next litlen out of usize range");
4283 let ll_delta = BtMatcher::cached_lit_length_delta_price(
4284 profile,
4285 $stats,
4286 next_litlen,
4287 &mut ll_cache,
4288 ll_price_stamp,
4289 );
4290 let price = BtMatcher::add_price_delta(nodes[0].price, lit_price, ll_delta);
4291 return $self.backend.bt_mut().finish_optimal_plan(
4292 HcOptimalPlanBuffers {
4293 nodes,
4294 candidates,
4295 store,
4296 price_arena,
4297 },
4298 (price, initial_reps, next_litlen, 1),
4299 );
4300 }
4301
4302 let target_pos = forced_end.unwrap_or(last_pos.min(frontier_limit));
4303 let last_stretch = if let Some(forced_state) = forced_end_state {
4304 forced_state
4305 } else {
4306 nodes[target_pos]
4307 };
4308 if last_stretch.price == u32::MAX {
4309 return $self.backend.bt_mut().finish_optimal_plan(
4310 HcOptimalPlanBuffers {
4311 nodes,
4312 candidates,
4313 store,
4314 price_arena,
4315 },
4316 (u32::MAX, initial_reps, initial_litlen, $current_len),
4317 );
4318 }
4319
4320 if last_stretch.mlen == 0 {
4321 return $self.backend.bt_mut().finish_optimal_plan(
4322 HcOptimalPlanBuffers {
4323 nodes,
4324 candidates,
4325 store,
4326 price_arena,
4327 },
4328 (
4329 last_stretch.price,
4330 last_stretch.reps,
4331 last_stretch.litlen as usize,
4332 target_pos.min($current_len),
4333 ),
4334 );
4335 }
4336
4337 let mut cur = target_pos.saturating_sub(last_stretch.mlen as usize);
4338 let end_reps = if last_stretch.litlen == 0 {
4339 let prev_state = nodes[cur];
4340 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
4341 last_stretch.off,
4342 prev_state.litlen as usize,
4343 prev_state.reps,
4344 );
4345 reps_after_match
4346 } else {
4347 let tail_literals = last_stretch.litlen as usize;
4348 if cur < tail_literals {
4349 return $self.backend.bt_mut().finish_optimal_plan(
4350 HcOptimalPlanBuffers {
4351 nodes,
4352 candidates,
4353 store,
4354 price_arena,
4355 },
4356 (
4357 last_stretch.price,
4358 last_stretch.reps,
4359 tail_literals,
4360 target_pos.min($current_len),
4361 ),
4362 );
4363 }
4364 cur -= tail_literals;
4365 last_stretch.reps
4366 };
4367 let store_end = cur + 2;
4368 if store.len() <= store_end {
4369 store.resize(store_end + 1, HcOptimalNode::default());
4370 }
4371 let mut store_start;
4372 let mut stretch_pos = cur;
4373
4374 if last_stretch.litlen > 0 {
4375 store[store_end] = HcOptimalNode {
4376 litlen: last_stretch.litlen,
4377 mlen: 0,
4378 ..HcOptimalNode::default()
4379 };
4380 store_start = store_end.saturating_sub(1);
4381 store[store_start] = last_stretch;
4382 }
4383 store[store_end] = last_stretch;
4384 store_start = store_end;
4385
4386 loop {
4387 let next_stretch = nodes[stretch_pos];
4388 store[store_start].litlen = next_stretch.litlen;
4389 if next_stretch.mlen == 0 {
4390 break;
4391 }
4392 if store_start == 0 {
4393 break;
4394 }
4395 store_start -= 1;
4396 store[store_start] = next_stretch;
4397 let litlen = next_stretch.litlen as usize;
4404 let mlen = next_stretch.mlen as usize;
4405 debug_assert!(litlen + mlen <= $current_len);
4406 let step = litlen + mlen;
4407 if step == 0 || stretch_pos < step {
4408 break;
4409 }
4410 stretch_pos -= step;
4411 }
4412
4413 let mut tail_literals = initial_litlen;
4414 let mut store_pos = store_start;
4415 while store_pos <= store_end {
4416 let stretch = store[store_pos];
4417 let llen = stretch.litlen as usize;
4418 let mlen = stretch.mlen as usize;
4419 if mlen == 0 {
4420 tail_literals = llen;
4421 store_pos += 1;
4422 continue;
4423 }
4424 $out.push(HcOptimalSequence {
4425 offset: stretch.off,
4426 match_len: mlen as u32,
4427 lit_len: llen as u32,
4428 });
4429 tail_literals = 0;
4430 store_pos += 1;
4431 }
4432 let result = (
4433 last_stretch.price,
4434 end_reps,
4435 if last_stretch.litlen > 0 {
4436 last_stretch.litlen as usize
4437 } else {
4438 tail_literals
4439 },
4440 target_pos.min($current_len),
4441 );
4442 $self.backend.bt_mut().finish_optimal_plan(
4443 HcOptimalPlanBuffers {
4444 nodes,
4445 candidates,
4446 store,
4447 price_arena,
4448 },
4449 result,
4450 )
4451 }};
4452}
4453
4454macro_rules! collect_optimal_candidates_initialized_body {
4463 (
4464 $self:expr,
4465 $strategy_ty:ty,
4466 $abs_pos:ident,
4467 $current_abs_end:ident,
4468 $profile:ident,
4469 $query:ident,
4470 $out:ident,
4471 $bt_matchfinder:ident,
4472 $bt_update:ident,
4473 $bt_insert:ident,
4474 $for_each_rep:ident,
4475 $hash3:ident,
4476 $cpl:path $(,)?
4477 ) => {{
4478 let use_hash3: bool = <$strategy_ty as super::strategy::Strategy>::USE_HASH3;
4487 debug_assert!(!$self.table.hash_table.is_empty());
4488 debug_assert!($self.table.hash3_log == 0 || !$self.table.hash3_table.is_empty());
4489 debug_assert!(
4490 !use_hash3 || $self.table.hash3_log != 0,
4491 "Strategy::USE_HASH3 = true but runtime hash3_log is 0 — call configure() first",
4492 );
4493 debug_assert!(!$self.table.chain_table.is_empty());
4494 let min_match_len = HC_OPT_MIN_MATCH_LEN;
4495 let reps = $query.reps;
4496 let lit_len = $query.lit_len;
4497 let ldm_candidate = $query.ldm_candidate;
4498 $out.clear();
4499 if $abs_pos < $self.table.skip_insert_until_abs {
4500 if let Some(ldm) = ldm_candidate {
4501 let mut best_len_for_skip = 0usize;
4502 let _ = super::bt::BtMatcher::push_candidate_ladder(
4503 $out,
4504 &mut best_len_for_skip,
4505 ldm,
4506 min_match_len,
4507 );
4508 }
4509 return;
4510 }
4511 if $bt_matchfinder {
4512 unsafe { $self.table.$bt_update($abs_pos, $current_abs_end) };
4515 }
4516 let current_idx = $abs_pos - $self.table.history_abs_start;
4517 if current_idx + 4 > $self.table.live_history().len() {
4518 if let Some(ldm) = ldm_candidate {
4519 let mut best_len_for_skip = 0usize;
4520 let _ = super::bt::BtMatcher::push_candidate_ladder(
4521 $out,
4522 &mut best_len_for_skip,
4523 ldm,
4524 min_match_len,
4525 );
4526 }
4527 return;
4528 }
4529 let mut best_len_for_skip = 0usize;
4530 let mut skip_further_match_search = false;
4531 let mut rep_len_candidate_found = false;
4532 unsafe {
4534 $self.hc.$for_each_rep(
4535 &$self.table,
4536 $abs_pos,
4537 lit_len,
4538 reps,
4539 $current_abs_end,
4540 min_match_len,
4541 |rep| {
4542 if rep.match_len >= min_match_len {
4543 rep_len_candidate_found = true;
4544 }
4545 let _ = super::bt::BtMatcher::push_candidate_ladder(
4546 $out,
4547 &mut best_len_for_skip,
4548 rep,
4549 min_match_len,
4550 );
4551 if rep.match_len > $profile.sufficient_match_len {
4552 skip_further_match_search = true;
4553 }
4554 if $abs_pos + rep.match_len >= $current_abs_end {
4561 skip_further_match_search = true;
4562 }
4563 },
4564 )
4565 };
4566 if use_hash3 && !skip_further_match_search && best_len_for_skip < min_match_len {
4570 $self.table.update_hash3_until($abs_pos);
4571 if let Some(h3) = unsafe {
4573 $self
4574 .table
4575 .$hash3($abs_pos, $current_abs_end, min_match_len)
4576 } {
4577 let _ = super::bt::BtMatcher::push_candidate_ladder(
4578 $out,
4579 &mut best_len_for_skip,
4580 h3,
4581 min_match_len,
4582 );
4583 if !rep_len_candidate_found
4584 && (h3.match_len > $profile.sufficient_match_len
4585 || $abs_pos + h3.match_len >= $current_abs_end)
4586 {
4587 $self.table.skip_insert_until_abs = $abs_pos + 1;
4588 skip_further_match_search = true;
4589 }
4590 }
4591 }
4592 if !skip_further_match_search && $bt_matchfinder {
4593 unsafe {
4595 $self.table.$bt_insert(
4596 $abs_pos,
4597 $current_abs_end,
4598 $profile,
4599 min_match_len,
4600 &mut best_len_for_skip,
4601 $out,
4602 )
4603 };
4604 } else if !skip_further_match_search {
4605 $self.table.insert_position($abs_pos);
4606 let max_chain_depth = $profile.max_chain_depth.min($self.hc.search_depth);
4607 let concat = $self.table.live_history();
4608 let mut match_end_abs = $abs_pos + 9;
4612 if max_chain_depth > 0 {
4613 for (visited, candidate_abs) in $self
4614 .hc
4615 .chain_candidates(&$self.table, $abs_pos)
4616 .into_iter()
4617 .enumerate()
4618 {
4619 if visited >= max_chain_depth {
4620 break;
4621 }
4622 if candidate_abs == usize::MAX {
4623 break;
4624 }
4625 if candidate_abs < $self.table.window_low_abs_for_target($abs_pos)
4626 || candidate_abs >= $abs_pos
4627 {
4628 continue;
4629 }
4630 let candidate_idx = candidate_abs - $self.table.history_abs_start;
4631 debug_assert!(
4632 $abs_pos <= $current_abs_end,
4633 "HC chain walker called past current block end"
4634 );
4635 let tail_limit = $current_abs_end - $abs_pos;
4636 let base = concat.as_ptr();
4637 let match_len =
4642 unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), tail_limit) };
4643 if match_len < min_match_len {
4644 continue;
4645 }
4646 let offset = $abs_pos - candidate_abs;
4647 if super::bt::BtMatcher::push_candidate_ladder(
4648 $out,
4649 &mut best_len_for_skip,
4650 MatchCandidate {
4651 start: $abs_pos,
4652 offset,
4653 match_len,
4654 },
4655 min_match_len,
4656 ) {
4657 let candidate_end = candidate_abs + match_len;
4658 if candidate_end > match_end_abs {
4659 match_end_abs = candidate_end;
4660 }
4661 }
4662 if match_len > HC_OPT_NUM || $abs_pos + match_len >= $current_abs_end {
4663 break;
4664 }
4665 }
4666 }
4667 $self.table.skip_insert_until_abs =
4670 $self.table.skip_insert_until_abs.max(match_end_abs - 8);
4671 }
4672 if let Some(ldm) = ldm_candidate {
4673 let _ = super::bt::BtMatcher::push_candidate_ladder(
4674 $out,
4675 &mut best_len_for_skip,
4676 ldm,
4677 min_match_len,
4678 );
4679 }
4680 }};
4681}
4682
4683macro_rules! hash3_candidate_body {
4688 (
4689 $table:expr,
4690 $abs_pos:ident,
4691 $current_abs_end:ident,
4692 $min_match_len:ident,
4693 $cpl:path $(,)?
4694 ) => {{
4695 if $table.hash3_log == 0 {
4696 return None;
4697 }
4698 let idx = $abs_pos.checked_sub($table.history_abs_start)?;
4699 let concat = $table.live_history();
4700 if idx + 4 > concat.len() {
4701 return None;
4702 }
4703 let hash3 = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
4704 concat,
4705 idx,
4706 $table.hash3_log,
4707 3,
4708 );
4709 let entry = $table
4710 .hash3_table
4711 .get(hash3)
4712 .copied()
4713 .unwrap_or($crate::encoding::match_table::storage::HC_EMPTY);
4714 let candidate_abs =
4715 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
4716 entry,
4717 $table.position_base,
4718 $table.index_shift,
4719 )?;
4720 if candidate_abs < $table.history_abs_start || candidate_abs >= $abs_pos {
4721 return None;
4722 }
4723 let offset = $abs_pos - candidate_abs;
4724 if offset >= $crate::encoding::bt::HC3_MAX_OFFSET {
4725 return None;
4726 }
4727 let candidate_idx = candidate_abs - $table.history_abs_start;
4728 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4729 let base = concat.as_ptr();
4730 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(idx), tail_limit) };
4733 (match_len >= $min_match_len).then_some($crate::encoding::opt::types::MatchCandidate {
4734 start: $abs_pos,
4735 offset,
4736 match_len,
4737 })
4738 }};
4739}
4740pub(crate) use hash3_candidate_body;
4741
4742macro_rules! for_each_repcode_candidate_body {
4752 (
4753 $table:expr,
4754 $abs_pos:ident,
4755 $lit_len:ident,
4756 $reps:ident,
4757 $current_abs_end:ident,
4758 $min_match_len:ident,
4759 $f:ident,
4760 $cpl:path $(,)?
4761 ) => {{
4762 let rep_offsets: [Option<usize>; 3] = if $lit_len == 0 {
4763 [
4764 Some($reps[1] as usize),
4765 Some($reps[2] as usize),
4766 ($reps[0] > 1).then_some(($reps[0] - 1) as usize),
4767 ]
4768 } else {
4769 [
4770 Some($reps[0] as usize),
4771 Some($reps[1] as usize),
4772 Some($reps[2] as usize),
4773 ]
4774 };
4775 let concat = $table.live_history();
4776 let current_idx = $abs_pos - $table.history_abs_start;
4777 if current_idx + 4 > concat.len() {
4778 return;
4779 }
4780 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4781 let base = concat.as_ptr();
4782 let concat_len = concat.len();
4783 for rep in rep_offsets.into_iter().flatten() {
4784 if rep == 0 || rep > $abs_pos {
4785 continue;
4786 }
4787 let candidate_pos = $abs_pos - rep;
4788 if candidate_pos < $table.history_abs_start {
4789 continue;
4790 }
4791 let candidate_idx = candidate_pos - $table.history_abs_start;
4792 let gate_matches = unsafe {
4804 let cand = base.add(candidate_idx).cast::<u32>().read_unaligned();
4805 let cur = base.add(current_idx).cast::<u32>().read_unaligned();
4806 if $min_match_len == 3 {
4807 (cand.to_le() & 0x00FF_FFFF) == (cur.to_le() & 0x00FF_FFFF)
4810 } else {
4811 cand == cur
4812 }
4813 };
4814 if !gate_matches {
4815 continue;
4816 }
4817 let max = (concat_len - candidate_idx)
4822 .min(concat_len - current_idx)
4823 .min(tail_limit);
4824 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), max) };
4825 if match_len < $min_match_len {
4826 continue;
4827 }
4828 $f(MatchCandidate {
4829 start: $abs_pos,
4830 offset: rep,
4831 match_len,
4832 });
4833 }
4834 }};
4835}
4836pub(crate) use for_each_repcode_candidate_body;
4837
4838macro_rules! bt_insert_and_collect_matches_body {
4845 (
4846 $table:expr,
4847 $search_depth:expr,
4848 $abs_pos:ident,
4849 $current_abs_end:ident,
4850 $profile:ident,
4851 $min_match_len:ident,
4852 $best_len_for_skip:ident,
4853 $out:ident,
4854 $cmf:path $(,)?
4855 ) => {{
4856 let idx = $abs_pos - $table.history_abs_start;
4857 let concat: &[u8] = unsafe {
4862 let lh = $table.live_history();
4863 core::slice::from_raw_parts(lh.as_ptr(), lh.len())
4864 };
4865 if idx + 8 > concat.len() {
4866 return;
4867 }
4868 debug_assert!(
4869 $abs_pos <= $current_abs_end,
4870 "BT collect called past current block end"
4871 );
4872 let tail_limit = $current_abs_end - $abs_pos;
4873 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
4874 concat,
4875 idx,
4876 $table.hash_log,
4877 $table.search_mls,
4878 );
4879 #[cfg(all(
4887 target_feature = "sse",
4888 any(target_arch = "x86", target_arch = "x86_64")
4889 ))]
4890 {
4891 #[cfg(target_arch = "x86")]
4892 use core::arch::x86::{_MM_HINT_T0, _mm_prefetch};
4893 #[cfg(target_arch = "x86_64")]
4894 use core::arch::x86_64::{_MM_HINT_T0, _mm_prefetch};
4895 unsafe {
4898 _mm_prefetch($table.hash_table.as_ptr().add(hash).cast(), _MM_HINT_T0);
4899 }
4900 if idx + 1 + 8 <= concat.len() {
4906 let hash_next =
4907 $crate::encoding::match_table::storage::MatchTable::hash_position_at(
4908 concat,
4909 idx + 1,
4910 $table.hash_log,
4911 $table.search_mls,
4912 );
4913 unsafe {
4916 _mm_prefetch(
4917 $table.hash_table.as_ptr().add(hash_next).cast(),
4918 _MM_HINT_T0,
4919 );
4920 }
4921 }
4922 }
4923 let Some(relative_pos) = $table.relative_position($abs_pos) else {
4924 return;
4925 };
4926 let stored = relative_pos + 1;
4927 let bt_mask = $table.bt_mask();
4928 let chain_ptr = $table.chain_table.as_mut_ptr();
4940 debug_assert_eq!($table.chain_table.len(), 2 << $table.bt_log());
4941 let bt_low = $abs_pos.saturating_sub(bt_mask);
4944 let window_low = $table.window_low_abs_for_target($abs_pos);
4945 let win_off = $table
4956 .position_base
4957 .wrapping_sub(1)
4958 .wrapping_sub($table.index_shift)
4959 .wrapping_sub(window_low);
4960 let win_range = $abs_pos - window_low;
4961 let mut match_end_abs = $abs_pos + 9;
4965 let mut compares_left = $profile.max_chain_depth.min($search_depth);
4966 let mut common_length_smaller = 0usize;
4967 let mut common_length_larger = 0usize;
4968 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
4969 let mut smaller_slot = pair_idx;
4970 let mut larger_slot = pair_idx + 1;
4971 let mut match_stored = $table.hash_table[hash];
4972 $table.hash_table[hash] = stored;
4973 debug_assert!(
4978 $min_match_len >= $crate::encoding::cost_model::HC_FORMAT_MINMATCH,
4979 "min_match_len must be at least HC_FORMAT_MINMATCH"
4980 );
4981 let mut best_len = (*$best_len_for_skip).max($min_match_len - 1);
4982
4983 while compares_left > 0 && (match_stored as usize).wrapping_add(win_off) < win_range {
4989 compares_left -= 1;
4990 let candidate_abs = ($table.position_base + (match_stored as usize - 1))
4994 .wrapping_sub($table.index_shift);
4995
4996 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
4997 let next_smaller = unsafe { *chain_ptr.add(next_pair_idx) };
5001 let next_larger = unsafe { *chain_ptr.add(next_pair_idx + 1) };
5002 let seed_len = common_length_smaller.min(common_length_larger);
5003 let candidate_idx = candidate_abs - $table.history_abs_start;
5004 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
5007
5008 if match_len > best_len {
5009 let offset = $abs_pos - candidate_abs;
5010 let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
5011 $out,
5012 $best_len_for_skip,
5013 $crate::encoding::opt::types::MatchCandidate {
5014 start: $abs_pos,
5015 offset,
5016 match_len,
5017 },
5018 $min_match_len,
5019 );
5020 if accepted {
5021 best_len = match_len;
5022 let candidate_end = candidate_abs + match_len;
5030 if candidate_end > match_end_abs {
5031 match_end_abs = candidate_end;
5032 }
5033 if match_len >= tail_limit
5034 || match_len > $crate::encoding::cost_model::HC_OPT_NUM
5035 {
5036 break;
5037 }
5038 }
5039 }
5040
5041 if match_len >= tail_limit {
5042 break;
5043 }
5044
5045 let candidate_next = candidate_idx + match_len;
5046 let current_next = idx + match_len;
5047 if unsafe {
5051 *concat.get_unchecked(candidate_next) < *concat.get_unchecked(current_next)
5052 } {
5053 unsafe { *chain_ptr.add(smaller_slot) = match_stored };
5057 common_length_smaller = match_len;
5058 if candidate_abs <= bt_low {
5059 smaller_slot = usize::MAX;
5060 break;
5061 }
5062 smaller_slot = next_pair_idx + 1;
5063 match_stored = next_larger;
5064 } else {
5065 unsafe { *chain_ptr.add(larger_slot) = match_stored };
5067 common_length_larger = match_len;
5068 if candidate_abs <= bt_low {
5069 larger_slot = usize::MAX;
5070 break;
5071 }
5072 larger_slot = next_pair_idx;
5073 match_stored = next_smaller;
5074 }
5075 }
5076
5077 if smaller_slot != usize::MAX {
5080 unsafe {
5081 *chain_ptr.add(smaller_slot) = $crate::encoding::match_table::storage::HC_EMPTY
5082 };
5083 }
5084 if larger_slot != usize::MAX {
5085 unsafe {
5086 *chain_ptr.add(larger_slot) = $crate::encoding::match_table::storage::HC_EMPTY
5087 };
5088 }
5089
5090 if let Some(dms) = $table.dms.table() {
5103 let region = $table.dms.region_len();
5104 let dh = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
5105 concat,
5106 idx,
5107 dms.hash_log,
5108 dms.mls,
5109 );
5110 let mut dcur = dms.hash_table[dh];
5111 let mut common_smaller = 0usize;
5114 let mut common_larger = 0usize;
5115 let mut dms_compares = $profile.max_chain_depth.min($search_depth);
5116 while dms_compares > 0 && dcur != $crate::encoding::match_table::storage::HC_EMPTY {
5117 let dict_idx = (dcur - 1) as usize;
5118 if dict_idx >= region || dict_idx >= idx {
5120 break;
5121 }
5122 dms_compares -= 1;
5123 let pair = 2 * dict_idx;
5124 let seed = common_smaller.min(common_larger);
5125 let match_len = unsafe { $cmf(concat, idx, dict_idx, tail_limit, seed) };
5129 if match_len > best_len {
5130 let offset = idx - dict_idx;
5131 let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
5132 $out,
5133 $best_len_for_skip,
5134 $crate::encoding::opt::types::MatchCandidate {
5135 start: $abs_pos,
5136 offset,
5137 match_len,
5138 },
5139 $min_match_len,
5140 );
5141 if accepted {
5142 best_len = match_len;
5143 let candidate_end = $abs_pos + match_len;
5144 if candidate_end > match_end_abs {
5145 match_end_abs = candidate_end;
5146 }
5147 if match_len > $crate::encoding::cost_model::HC_OPT_NUM {
5148 break;
5149 }
5150 }
5151 }
5152 if match_len >= tail_limit {
5156 break;
5157 }
5158 if concat[dict_idx + match_len] < concat[idx + match_len] {
5161 common_smaller = match_len;
5162 dcur = dms.chain_table[pair + 1];
5163 } else {
5164 common_larger = match_len;
5165 dcur = dms.chain_table[pair];
5166 }
5167 }
5168 }
5169
5170 $table.skip_insert_until_abs = match_end_abs - 8;
5173 }};
5174}
5175pub(crate) use bt_insert_and_collect_matches_body;
5176
5177impl HcMatchGenerator {
5178 fn heap_size(&self) -> usize {
5181 self.table.heap_size() + self.backend.heap_size()
5182 }
5183
5184 fn should_run_btultra2_seed_pass<S: super::strategy::Strategy>(
5185 &self,
5186 current_len: usize,
5187 ) -> bool {
5188 if !S::TWO_PASS_SEED {
5194 return false;
5195 }
5196 let HcBackend::Bt(bt) = &self.backend else {
5197 return false;
5198 };
5199 bt.opt_state.lit_length_sum == 0
5200 && bt.opt_state.dictionary_seed.is_none()
5201 && !self.table.dictionary_primed_for_frame
5202 && bt.ldm_sequences.is_empty()
5203 && self.table.window_size == current_len
5204 && self.table.history_abs_start == 0
5205 && self.table.chunk_lens.len() == 1
5206 && current_len > HC_PREDEF_THRESHOLD
5207 }
5208
5209 fn new(max_window_size: usize) -> Self {
5210 Self {
5211 table: super::match_table::storage::MatchTable::new(max_window_size),
5212 hc: super::hc::HcMatcher::new(2, HC_SEARCH_DEPTH, HC_TARGET_LEN),
5213 backend: HcBackend::Hc,
5216 strategy_tag: super::strategy::StrategyTag::Lazy,
5223 }
5224 }
5225
5226 fn configure(&mut self, config: HcConfig, tag: super::strategy::StrategyTag, window_log: u8) {
5227 use super::strategy::StrategyTag;
5228 self.strategy_tag = tag;
5232 let is_btultra2 = tag == StrategyTag::BtUltra2;
5233 let uses_bt = matches!(
5234 tag,
5235 StrategyTag::Btlazy2
5236 | StrategyTag::BtOpt
5237 | StrategyTag::BtUltra
5238 | StrategyTag::BtUltra2
5239 );
5240 let wants_hash3 = matches!(tag, StrategyTag::BtUltra | StrategyTag::BtUltra2);
5245 let next_hash3_log = if wants_hash3 {
5246 HC3_HASH_LOG.min(window_log as usize)
5247 } else {
5248 0
5249 };
5250 let resize = self.table.hash_log != config.hash_log
5251 || self.table.chain_log != config.chain_log
5252 || self.table.hash3_log != next_hash3_log;
5253 self.table.hash_log = config.hash_log;
5254 self.table.chain_log = config.chain_log;
5255 self.table.hash3_log = next_hash3_log;
5256 self.hc.search_depth = if uses_bt {
5257 config.search_depth
5258 } else {
5259 config.search_depth.min(MAX_HC_SEARCH_DEPTH)
5260 };
5261 self.hc.target_len = config.target_len;
5262 self.table.search_depth = self.hc.search_depth;
5266 self.table.is_btultra2 = is_btultra2;
5267 self.table.uses_bt = uses_bt;
5268 self.table.search_mls = config.search_mls;
5276 match (&self.backend, self.table.uses_bt) {
5280 (HcBackend::Hc, true) => {
5281 self.backend = HcBackend::Bt(alloc::boxed::Box::new(super::bt::BtMatcher::new()));
5282 }
5283 (HcBackend::Bt(_), false) => {
5284 self.backend = HcBackend::Hc;
5285 }
5286 _ => {}
5287 }
5288 if resize && !self.table.hash_table.is_empty() {
5289 self.table.hash_table.clear();
5291 self.table.hash3_table.clear();
5292 self.table.chain_table.clear();
5293 }
5294 }
5295
5296 fn seed_dictionary_entropy(
5297 &mut self,
5298 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
5299 ll: Option<&crate::fse::fse_encoder::FSETable>,
5300 ml: Option<&crate::fse::fse_encoder::FSETable>,
5301 of: Option<&crate::fse::fse_encoder::FSETable>,
5302 ) {
5303 if let HcBackend::Bt(bt) = &mut self.backend {
5304 bt.opt_state.seed_dictionary_entropy(huff, ll, ml, of);
5305 }
5306 }
5307
5308 #[cfg(feature = "hash")]
5313 fn set_ldm_producer(&mut self, producer: Option<super::ldm::LdmProducer>) {
5314 if let HcBackend::Bt(bt) = &mut self.backend {
5315 bt.ldm_producer = producer;
5316 }
5317 }
5318
5319 #[cfg(feature = "hash")]
5325 fn take_ldm_producer(&mut self) -> Option<super::ldm::LdmProducer> {
5326 if let HcBackend::Bt(bt) = &mut self.backend {
5327 bt.ldm_producer.take()
5328 } else {
5329 None
5330 }
5331 }
5332
5333 fn reset(&mut self, reuse_space: impl FnMut(Vec<u8>)) {
5334 self.table.reset(reuse_space);
5335 if let HcBackend::Bt(bt) = &mut self.backend {
5336 bt.reset();
5337 }
5338 }
5339
5340 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
5343 self.table.skip_matching(incompressible_hint);
5344 }
5345
5346 #[cfg(test)]
5352 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
5353 use super::strategy::{self, StrategyTag};
5354 match self.strategy_tag {
5360 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
5361 self.start_matching_lazy(&mut handle_sequence)
5362 }
5363 StrategyTag::Btlazy2 => self.start_matching_btlazy2(&mut handle_sequence),
5364 StrategyTag::BtOpt => {
5365 self.start_matching_optimal::<strategy::BtOpt>(&mut handle_sequence)
5366 }
5367 StrategyTag::BtUltra => {
5368 self.start_matching_optimal::<strategy::BtUltra>(&mut handle_sequence)
5369 }
5370 StrategyTag::BtUltra2 => {
5371 self.start_matching_optimal::<strategy::BtUltra2>(&mut handle_sequence)
5372 }
5373 }
5374 }
5375
5376 pub(crate) fn start_matching_strategy<S: super::strategy::Strategy>(
5387 &mut self,
5388 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
5389 ) {
5390 debug_assert_eq!(
5391 self.table.uses_bt,
5392 S::USE_BT,
5393 "Strategy::USE_BT disagrees with runtime table.uses_bt at HC dispatch"
5394 );
5395 if S::USE_BT {
5396 self.start_matching_optimal::<S>(handle_sequence)
5397 } else {
5398 self.start_matching_lazy(handle_sequence)
5399 }
5400 }
5401
5402 pub(crate) fn start_matching_lazy(
5403 &mut self,
5404 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5405 ) {
5406 self.table.ensure_tables();
5407
5408 let (current_abs_start, current_len) = self.table.current_block_range();
5411 if current_len == 0 {
5412 return;
5413 }
5414 let current_ptr = self.table.get_last_space().as_ptr();
5421 let current: &[u8] = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
5422
5423 let current_abs_end = current_abs_start + current_len;
5424 self.table
5425 .backfill_boundary_positions(current_abs_start, current_abs_end);
5426
5427 let mut pos = 0usize;
5428 let mut literals_start = 0usize;
5429 while pos + HC_MIN_MATCH_LEN <= current_len {
5430 let abs_pos = current_abs_start + pos;
5431 let lit_len = pos - literals_start;
5432
5433 let best = self.hc.find_best_match(&self.table, abs_pos, lit_len);
5434 if let Some(candidate) = self.hc.pick_lazy_match(&self.table, abs_pos, lit_len, best) {
5435 self.table
5436 .insert_match_span(abs_pos, candidate.start + candidate.match_len);
5437 let start = candidate.start - current_abs_start;
5438 let literals = ¤t[literals_start..start];
5439 handle_sequence(Sequence::Triple {
5440 literals,
5441 offset: candidate.offset,
5442 match_len: candidate.match_len,
5443 });
5444 let _ = encode_offset_with_history(
5445 candidate.offset as u32,
5446 literals.len() as u32,
5447 &mut self.table.offset_hist,
5448 );
5449 pos = start + candidate.match_len;
5450 literals_start = pos;
5451 } else {
5452 self.table.insert_position(abs_pos);
5453 pos += 1;
5454 }
5455 }
5456
5457 while pos + 4 <= current_len {
5460 self.table.insert_position(current_abs_start + pos);
5461 pos += 1;
5462 }
5463
5464 if literals_start < current_len {
5465 handle_sequence(Sequence::Literals {
5466 literals: ¤t[literals_start..],
5467 });
5468 }
5469 }
5470
5471 pub(crate) unsafe fn set_borrowed_window(&mut self, buffer: &[u8]) {
5475 unsafe { self.table.set_borrowed_window(buffer) };
5477 }
5478
5479 pub(crate) fn clear_borrowed_window(&mut self) {
5480 self.table.clear_borrowed_window();
5481 }
5482
5483 pub(crate) fn start_matching_lazy_borrowed(
5489 &mut self,
5490 block_start: usize,
5491 block_end: usize,
5492 handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5493 ) {
5494 self.table.stage_borrowed_block(block_start, block_end);
5495 self.start_matching_lazy(handle_sequence);
5496 }
5497
5498 pub(crate) fn skip_matching_borrowed(
5501 &mut self,
5502 block_start: usize,
5503 block_end: usize,
5504 incompressible_hint: Option<bool>,
5505 ) {
5506 self.table.stage_borrowed_block(block_start, block_end);
5507 self.table.skip_matching(incompressible_hint);
5508 }
5509
5510 fn start_matching_btlazy2(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
5518 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5519 unsafe {
5520 self.start_matching_btlazy2_neon(&mut handle_sequence)
5521 }
5522 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5523 {
5524 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
5525 match select_kernel() {
5526 FastpathKernel::Avx2Bmi2 => unsafe {
5527 self.start_matching_btlazy2_avx2_bmi2(&mut handle_sequence)
5528 },
5529 FastpathKernel::Sse42 => unsafe {
5530 self.start_matching_btlazy2_sse42(&mut handle_sequence)
5531 },
5532 FastpathKernel::Scalar => self.start_matching_btlazy2_scalar(&mut handle_sequence),
5533 }
5534 }
5535 #[cfg(not(any(
5536 all(target_arch = "aarch64", target_endian = "little"),
5537 target_arch = "x86",
5538 target_arch = "x86_64"
5539 )))]
5540 {
5541 self.start_matching_btlazy2_scalar(&mut handle_sequence)
5542 }
5543 }
5544
5545 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5546 #[target_feature(enable = "neon")]
5547 unsafe fn start_matching_btlazy2_neon(
5548 &mut self,
5549 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5550 ) {
5551 start_matching_btlazy2_body!(
5552 self,
5553 handle_sequence,
5554 collect_optimal_candidates_initialized_neon,
5555 crate::encoding::fastpath::neon::count_match_from_indices
5556 )
5557 }
5558
5559 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5560 #[target_feature(enable = "sse4.2")]
5561 unsafe fn start_matching_btlazy2_sse42(
5562 &mut self,
5563 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5564 ) {
5565 start_matching_btlazy2_body!(
5566 self,
5567 handle_sequence,
5568 collect_optimal_candidates_initialized_sse42,
5569 crate::encoding::fastpath::sse42::count_match_from_indices
5570 )
5571 }
5572
5573 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5574 #[target_feature(enable = "avx2,bmi2")]
5575 unsafe fn start_matching_btlazy2_avx2_bmi2(
5576 &mut self,
5577 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5578 ) {
5579 start_matching_btlazy2_body!(
5580 self,
5581 handle_sequence,
5582 collect_optimal_candidates_initialized_avx2_bmi2,
5583 crate::encoding::fastpath::avx2_bmi2::count_match_from_indices
5584 )
5585 }
5586
5587 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
5592 #[allow(unused_unsafe)]
5593 fn start_matching_btlazy2_scalar(
5594 &mut self,
5595 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5596 ) {
5597 start_matching_btlazy2_body!(
5598 self,
5599 handle_sequence,
5600 collect_optimal_candidates_initialized_scalar,
5601 crate::encoding::fastpath::scalar::count_match_from_indices
5602 )
5603 }
5604
5605 fn start_matching_optimal<S: super::strategy::Strategy>(
5606 &mut self,
5607 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5608 ) {
5609 self.table.ensure_tables();
5610 let (current_abs_start, current_len) = self.table.current_block_range();
5613 if current_len == 0 {
5614 return;
5615 }
5616 let current_ptr = self.table.get_last_space().as_ptr();
5617 let current = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
5621
5622 let current_abs_end = current_abs_start + current_len;
5623 self.table
5624 .apply_limited_update_after_long_match(current_abs_start);
5625 let hash3_start_cursor = self
5626 .table
5627 .skip_insert_until_abs
5628 .max(self.table.history_abs_start);
5629 self.table
5630 .backfill_boundary_positions(current_abs_start, current_abs_end);
5631 self.table.next_to_update3 = hash3_start_cursor;
5632 let live_history = self.table.live_history();
5647 let history_abs_start = self.table.history_abs_start;
5648 self.backend.bt_mut().prepare_ldm_candidates(
5649 live_history,
5650 history_abs_start,
5651 current_abs_start,
5652 current_len,
5653 );
5654
5655 if self.should_run_btultra2_seed_pass::<S>(current_len) {
5656 self.run_btultra2_seed_pass(current, current_abs_start, current_len);
5657 }
5658
5659 let profile = HcOptimalCostProfile::const_for_strategy::<S>();
5665 let mut opt_state =
5666 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
5667 opt_state.rescale_freqs(current, profile);
5668 let mut best_plan = core::mem::take(&mut self.backend.bt_mut().opt_segment_plan_scratch);
5669 best_plan.clear();
5670 let mut plan_reps = self.table.offset_hist;
5671 let (mut cursor, mut plan_litlen) = self
5672 .table
5673 .donor_opt_start_cursor_and_litlen(current_abs_start);
5674 let mut plan_literals_cursor = 0usize;
5675 let match_loop_limit = current_len.saturating_sub(8);
5676 while cursor < match_loop_limit {
5677 let remaining_len = current_len - cursor;
5678 let segment_abs_start = current_abs_start + cursor;
5679 let segment_start = best_plan.len();
5680 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
5681 ¤t[cursor..],
5682 segment_abs_start,
5683 remaining_len,
5684 HcOptimalPlanState {
5685 block_offset: cursor,
5686 reps: plan_reps,
5687 litlen: plan_litlen,
5688 profile,
5689 },
5690 &opt_state,
5691 &mut best_plan,
5692 );
5693 BtMatcher::update_plan_stats_segment(
5694 current,
5695 current_len,
5696 &best_plan[segment_start..],
5697 &mut plan_literals_cursor,
5698 &mut plan_reps,
5699 &mut opt_state,
5700 profile.accurate,
5701 );
5702 plan_reps = end_reps;
5703 plan_litlen = end_litlen;
5704 cursor += consumed_len;
5705 }
5706
5707 self.table
5708 .emit_optimal_plan(current_len, &best_plan, &mut handle_sequence);
5709 best_plan.clear();
5710 self.backend.bt_mut().opt_segment_plan_scratch = best_plan;
5711 self.backend.bt_mut().opt_state = opt_state;
5712 }
5713
5714 fn run_btultra2_seed_pass(
5715 &mut self,
5716 current: &[u8],
5717 current_abs_start: usize,
5718 current_len: usize,
5719 ) {
5720 type S = super::strategy::BtUltra2;
5725 let seed_profile = HcOptimalCostProfile::const_for_strategy::<S>();
5726 let mut opt_state =
5727 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
5728 opt_state.rescale_freqs(current, seed_profile);
5729 let mut seed_reps = self.table.offset_hist;
5730 let (mut cursor, mut seed_litlen) = self
5731 .table
5732 .donor_opt_start_cursor_and_litlen(current_abs_start);
5733 let mut seed_literals_cursor = 0usize;
5734 let mut seed_plan = core::mem::take(&mut self.backend.bt_mut().opt_seed_plan_scratch);
5735 seed_plan.clear();
5736 let match_loop_limit = current_len.saturating_sub(8);
5737 while cursor < match_loop_limit {
5738 let remaining_len = current_len - cursor;
5739 let segment_abs_start = current_abs_start + cursor;
5740 let segment_start = seed_plan.len();
5741 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
5742 ¤t[cursor..],
5743 segment_abs_start,
5744 remaining_len,
5745 HcOptimalPlanState {
5746 block_offset: cursor,
5747 reps: seed_reps,
5748 litlen: seed_litlen,
5749 profile: seed_profile,
5750 },
5751 &opt_state,
5752 &mut seed_plan,
5753 );
5754 BtMatcher::update_plan_stats_segment(
5755 current,
5756 current_len,
5757 &seed_plan[segment_start..],
5758 &mut seed_literals_cursor,
5759 &mut seed_reps,
5760 &mut opt_state,
5761 seed_profile.accurate,
5762 );
5763 seed_plan.truncate(segment_start);
5764 seed_reps = end_reps;
5765 seed_litlen = end_litlen;
5766 cursor += consumed_len;
5767 }
5768 seed_plan.clear();
5769 self.backend.bt_mut().opt_seed_plan_scratch = seed_plan;
5770 self.backend.bt_mut().opt_state = opt_state;
5771
5772 self.table.position_base = self.table.history_abs_start;
5775 self.table.index_shift = current_len;
5776 self.table.next_to_update3 = current_abs_start;
5777 self.table.skip_insert_until_abs = current_abs_start;
5778 self.table.allow_zero_relative_position = true;
5784 }
5785
5786 fn build_optimal_plan<S: super::strategy::Strategy>(
5787 &mut self,
5788 current: &[u8],
5789 current_abs_start: usize,
5790 current_len: usize,
5791 initial_state: HcOptimalPlanState,
5792 stats: &HcOptState,
5793 out: &mut Vec<HcOptimalSequence>,
5794 ) -> (u32, [u32; 3], usize, usize) {
5795 debug_assert!(S::USE_BT, "build_optimal_plan called on non-BT strategy");
5796 debug_assert_eq!(initial_state.profile.accurate, S::ACCURATE_PRICE);
5797 debug_assert_eq!(
5798 initial_state.profile.favor_small_offsets,
5799 S::FAVOR_SMALL_OFFSETS
5800 );
5801 match (S::ACCURATE_PRICE, S::FAVOR_SMALL_OFFSETS) {
5811 (true, false) => self.build_optimal_plan_impl::<S, true, false>(
5812 current,
5813 current_abs_start,
5814 current_len,
5815 initial_state,
5816 stats,
5817 out,
5818 ),
5819 (true, true) => self.build_optimal_plan_impl::<S, true, true>(
5820 current,
5821 current_abs_start,
5822 current_len,
5823 initial_state,
5824 stats,
5825 out,
5826 ),
5827 (false, false) => self.build_optimal_plan_impl::<S, false, false>(
5828 current,
5829 current_abs_start,
5830 current_len,
5831 initial_state,
5832 stats,
5833 out,
5834 ),
5835 (false, true) => self.build_optimal_plan_impl::<S, false, true>(
5836 current,
5837 current_abs_start,
5838 current_len,
5839 initial_state,
5840 stats,
5841 out,
5842 ),
5843 }
5844 }
5845
5846 #[inline(always)]
5855 fn build_optimal_plan_impl<
5856 S: super::strategy::Strategy,
5857 const ACCURATE_PRICE: bool,
5858 const FAVOR_SMALL_OFFSETS: bool,
5859 >(
5860 &mut self,
5861 current: &[u8],
5862 current_abs_start: usize,
5863 current_len: usize,
5864 initial_state: HcOptimalPlanState,
5865 stats: &HcOptState,
5866 out: &mut Vec<HcOptimalSequence>,
5867 ) -> (u32, [u32; 3], usize, usize) {
5868 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5869 unsafe {
5870 self.build_optimal_plan_impl_neon::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5871 current,
5872 current_abs_start,
5873 current_len,
5874 initial_state,
5875 stats,
5876 out,
5877 )
5878 }
5879 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5880 {
5881 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
5882 match select_kernel() {
5883 FastpathKernel::Avx2Bmi2 => unsafe {
5884 self.build_optimal_plan_impl_avx2_bmi2::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5885 current,
5886 current_abs_start,
5887 current_len,
5888 initial_state,
5889 stats,
5890 out,
5891 )
5892 },
5893 FastpathKernel::Sse42 => unsafe {
5894 self.build_optimal_plan_impl_sse42::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5895 current,
5896 current_abs_start,
5897 current_len,
5898 initial_state,
5899 stats,
5900 out,
5901 )
5902 },
5903 FastpathKernel::Scalar => self
5904 .build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5905 current,
5906 current_abs_start,
5907 current_len,
5908 initial_state,
5909 stats,
5910 out,
5911 ),
5912 }
5913 }
5914 #[cfg(not(any(
5915 all(target_arch = "aarch64", target_endian = "little"),
5916 target_arch = "x86",
5917 target_arch = "x86_64"
5918 )))]
5919 {
5920 self.build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5921 current,
5922 current_abs_start,
5923 current_len,
5924 initial_state,
5925 stats,
5926 out,
5927 )
5928 }
5929 }
5930
5931 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5935 #[target_feature(enable = "neon")]
5936 unsafe fn build_optimal_plan_impl_neon<
5937 S: super::strategy::Strategy,
5938 const ACCURATE_PRICE: bool,
5939 const FAVOR_SMALL_OFFSETS: bool,
5940 >(
5941 &mut self,
5942 current: &[u8],
5943 current_abs_start: usize,
5944 current_len: usize,
5945 initial_state: HcOptimalPlanState,
5946 stats: &HcOptState,
5947 out: &mut Vec<HcOptimalSequence>,
5948 ) -> (u32, [u32; 3], usize, usize) {
5949 build_optimal_plan_impl_body!(
5950 self,
5951 S,
5952 current,
5953 current_abs_start,
5954 current_len,
5955 initial_state,
5956 stats,
5957 out,
5958 collect_optimal_candidates_initialized_neon,
5959 )
5960 }
5961
5962 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5963 #[target_feature(enable = "sse4.2")]
5964 unsafe fn build_optimal_plan_impl_sse42<
5965 S: super::strategy::Strategy,
5966 const ACCURATE_PRICE: bool,
5967 const FAVOR_SMALL_OFFSETS: bool,
5968 >(
5969 &mut self,
5970 current: &[u8],
5971 current_abs_start: usize,
5972 current_len: usize,
5973 initial_state: HcOptimalPlanState,
5974 stats: &HcOptState,
5975 out: &mut Vec<HcOptimalSequence>,
5976 ) -> (u32, [u32; 3], usize, usize) {
5977 build_optimal_plan_impl_body!(
5978 self,
5979 S,
5980 current,
5981 current_abs_start,
5982 current_len,
5983 initial_state,
5984 stats,
5985 out,
5986 collect_optimal_candidates_initialized_sse42,
5987 )
5988 }
5989
5990 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5991 #[target_feature(enable = "avx2,bmi2")]
5992 unsafe fn build_optimal_plan_impl_avx2_bmi2<
5993 S: super::strategy::Strategy,
5994 const ACCURATE_PRICE: bool,
5995 const FAVOR_SMALL_OFFSETS: bool,
5996 >(
5997 &mut self,
5998 current: &[u8],
5999 current_abs_start: usize,
6000 current_len: usize,
6001 initial_state: HcOptimalPlanState,
6002 stats: &HcOptState,
6003 out: &mut Vec<HcOptimalSequence>,
6004 ) -> (u32, [u32; 3], usize, usize) {
6005 build_optimal_plan_impl_body!(
6006 self,
6007 S,
6008 current,
6009 current_abs_start,
6010 current_len,
6011 initial_state,
6012 stats,
6013 out,
6014 collect_optimal_candidates_initialized_avx2_bmi2,
6015 )
6016 }
6017
6018 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6019 #[allow(unused_unsafe)]
6023 fn build_optimal_plan_impl_scalar<
6024 S: super::strategy::Strategy,
6025 const ACCURATE_PRICE: bool,
6026 const FAVOR_SMALL_OFFSETS: bool,
6027 >(
6028 &mut self,
6029 current: &[u8],
6030 current_abs_start: usize,
6031 current_len: usize,
6032 initial_state: HcOptimalPlanState,
6033 stats: &HcOptState,
6034 out: &mut Vec<HcOptimalSequence>,
6035 ) -> (u32, [u32; 3], usize, usize) {
6036 build_optimal_plan_impl_body!(
6037 self,
6038 S,
6039 current,
6040 current_abs_start,
6041 current_len,
6042 initial_state,
6043 stats,
6044 out,
6045 collect_optimal_candidates_initialized_scalar,
6046 )
6047 }
6048
6049 #[cfg(test)]
6050 fn collect_optimal_candidates(
6051 &mut self,
6052 abs_pos: usize,
6053 current_abs_end: usize,
6054 profile: HcOptimalCostProfile,
6055 query: HcCandidateQuery,
6056 out: &mut Vec<MatchCandidate>,
6057 ) {
6058 use super::strategy::{self, StrategyTag};
6059 self.table.ensure_tables();
6060 match self.strategy_tag {
6066 StrategyTag::BtUltra2 => self
6067 .collect_optimal_candidates_initialized::<strategy::BtUltra2, true>(
6068 abs_pos,
6069 current_abs_end,
6070 profile,
6071 query,
6072 out,
6073 ),
6074 StrategyTag::BtUltra => self
6075 .collect_optimal_candidates_initialized::<strategy::BtUltra, true>(
6076 abs_pos,
6077 current_abs_end,
6078 profile,
6079 query,
6080 out,
6081 ),
6082 StrategyTag::Btlazy2 => self
6083 .collect_optimal_candidates_initialized::<strategy::Btlazy2, true>(
6084 abs_pos,
6085 current_abs_end,
6086 profile,
6087 query,
6088 out,
6089 ),
6090 StrategyTag::BtOpt => self
6091 .collect_optimal_candidates_initialized::<strategy::BtOpt, true>(
6092 abs_pos,
6093 current_abs_end,
6094 profile,
6095 query,
6096 out,
6097 ),
6098 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
6099 self.collect_optimal_candidates_initialized::<strategy::Lazy, false>(
6100 abs_pos,
6101 current_abs_end,
6102 profile,
6103 query,
6104 out,
6105 )
6106 }
6107 }
6108 }
6109
6110 #[allow(dead_code)]
6120 #[inline(always)]
6121 fn collect_optimal_candidates_initialized<
6122 S: super::strategy::Strategy,
6123 const USE_BT_MATCHFINDER: bool,
6124 >(
6125 &mut self,
6126 abs_pos: usize,
6127 current_abs_end: usize,
6128 profile: HcOptimalCostProfile,
6129 query: HcCandidateQuery,
6130 out: &mut Vec<MatchCandidate>,
6131 ) {
6132 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6133 unsafe {
6134 self.collect_optimal_candidates_initialized_neon::<S, USE_BT_MATCHFINDER>(
6135 abs_pos,
6136 current_abs_end,
6137 profile,
6138 query,
6139 out,
6140 )
6141 }
6142 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6143 {
6144 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
6145 match select_kernel() {
6146 FastpathKernel::Avx2Bmi2 => unsafe {
6147 self.collect_optimal_candidates_initialized_avx2_bmi2::<S, USE_BT_MATCHFINDER>(
6148 abs_pos,
6149 current_abs_end,
6150 profile,
6151 query,
6152 out,
6153 )
6154 },
6155 FastpathKernel::Sse42 => unsafe {
6156 self.collect_optimal_candidates_initialized_sse42::<S, USE_BT_MATCHFINDER>(
6157 abs_pos,
6158 current_abs_end,
6159 profile,
6160 query,
6161 out,
6162 )
6163 },
6164 FastpathKernel::Scalar => self
6165 .collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
6166 abs_pos,
6167 current_abs_end,
6168 profile,
6169 query,
6170 out,
6171 ),
6172 }
6173 }
6174 #[cfg(not(any(
6175 all(target_arch = "aarch64", target_endian = "little"),
6176 target_arch = "x86",
6177 target_arch = "x86_64"
6178 )))]
6179 {
6180 self.collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
6181 abs_pos,
6182 current_abs_end,
6183 profile,
6184 query,
6185 out,
6186 )
6187 }
6188 }
6189
6190 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6196 #[target_feature(enable = "neon")]
6197 unsafe fn collect_optimal_candidates_initialized_neon<
6198 S: super::strategy::Strategy,
6199 const USE_BT_MATCHFINDER: bool,
6200 >(
6201 &mut self,
6202 abs_pos: usize,
6203 current_abs_end: usize,
6204 profile: HcOptimalCostProfile,
6205 query: HcCandidateQuery,
6206 out: &mut Vec<MatchCandidate>,
6207 ) {
6208 collect_optimal_candidates_initialized_body!(
6209 self,
6210 S,
6211 abs_pos,
6212 current_abs_end,
6213 profile,
6214 query,
6215 out,
6216 USE_BT_MATCHFINDER,
6217 bt_update_tree_until_neon,
6218 bt_insert_and_collect_matches_neon,
6219 for_each_repcode_candidate_with_reps_neon,
6220 hash3_candidate_neon,
6221 crate::encoding::fastpath::neon::common_prefix_len_ptr,
6222 )
6223 }
6224
6225 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6226 #[target_feature(enable = "sse4.2")]
6227 unsafe fn collect_optimal_candidates_initialized_sse42<
6228 S: super::strategy::Strategy,
6229 const USE_BT_MATCHFINDER: bool,
6230 >(
6231 &mut self,
6232 abs_pos: usize,
6233 current_abs_end: usize,
6234 profile: HcOptimalCostProfile,
6235 query: HcCandidateQuery,
6236 out: &mut Vec<MatchCandidate>,
6237 ) {
6238 collect_optimal_candidates_initialized_body!(
6239 self,
6240 S,
6241 abs_pos,
6242 current_abs_end,
6243 profile,
6244 query,
6245 out,
6246 USE_BT_MATCHFINDER,
6247 bt_update_tree_until_sse42,
6248 bt_insert_and_collect_matches_sse42,
6249 for_each_repcode_candidate_with_reps_sse42,
6250 hash3_candidate_sse42,
6251 crate::encoding::fastpath::sse42::common_prefix_len_ptr,
6252 )
6253 }
6254
6255 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6256 #[target_feature(enable = "avx2,bmi2")]
6257 unsafe fn collect_optimal_candidates_initialized_avx2_bmi2<
6258 S: super::strategy::Strategy,
6259 const USE_BT_MATCHFINDER: bool,
6260 >(
6261 &mut self,
6262 abs_pos: usize,
6263 current_abs_end: usize,
6264 profile: HcOptimalCostProfile,
6265 query: HcCandidateQuery,
6266 out: &mut Vec<MatchCandidate>,
6267 ) {
6268 collect_optimal_candidates_initialized_body!(
6269 self,
6270 S,
6271 abs_pos,
6272 current_abs_end,
6273 profile,
6274 query,
6275 out,
6276 USE_BT_MATCHFINDER,
6277 bt_update_tree_until_avx2_bmi2,
6278 bt_insert_and_collect_matches_avx2_bmi2,
6279 for_each_repcode_candidate_with_reps_avx2_bmi2,
6280 hash3_candidate_avx2_bmi2,
6281 crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
6282 )
6283 }
6284
6285 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6286 #[allow(unused_unsafe)]
6289 fn collect_optimal_candidates_initialized_scalar<
6290 S: super::strategy::Strategy,
6291 const USE_BT_MATCHFINDER: bool,
6292 >(
6293 &mut self,
6294 abs_pos: usize,
6295 current_abs_end: usize,
6296 profile: HcOptimalCostProfile,
6297 query: HcCandidateQuery,
6298 out: &mut Vec<MatchCandidate>,
6299 ) {
6300 collect_optimal_candidates_initialized_body!(
6301 self,
6302 S,
6303 abs_pos,
6304 current_abs_end,
6305 profile,
6306 query,
6307 out,
6308 USE_BT_MATCHFINDER,
6309 bt_update_tree_until_scalar,
6310 bt_insert_and_collect_matches_scalar,
6311 for_each_repcode_candidate_with_reps_scalar,
6312 hash3_candidate_scalar,
6313 crate::encoding::fastpath::scalar::common_prefix_len_ptr,
6314 )
6315 }
6316}
6317
6318#[cfg(any())] #[test]
6320fn matches() {
6321 let mut matcher = MatchGenerator::new(1000);
6322 let mut original_data = Vec::new();
6323 let mut reconstructed = Vec::new();
6324
6325 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
6326 Sequence::Literals { literals } => {
6327 assert!(!literals.is_empty());
6328 reconstructed.extend_from_slice(literals);
6329 }
6330 Sequence::Triple {
6331 literals,
6332 offset,
6333 match_len,
6334 } => {
6335 assert!(offset > 0);
6336 assert!(match_len >= MIN_MATCH_LEN);
6337 reconstructed.extend_from_slice(literals);
6338 assert!(offset <= reconstructed.len());
6339 let start = reconstructed.len() - offset;
6340 for i in 0..match_len {
6341 let byte = reconstructed[start + i];
6342 reconstructed.push(byte);
6343 }
6344 }
6345 };
6346
6347 matcher.add_data(
6348 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
6349 SuffixStore::with_capacity(100),
6350 |_, _| {},
6351 );
6352 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
6353
6354 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6355
6356 assert!(!matcher.next_sequence(|_| {}));
6357
6358 matcher.add_data(
6359 alloc::vec![
6360 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
6361 ],
6362 SuffixStore::with_capacity(100),
6363 |_, _| {},
6364 );
6365 original_data.extend_from_slice(&[
6366 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
6367 ]);
6368
6369 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6370 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6371 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6372 assert!(!matcher.next_sequence(|_| {}));
6373
6374 matcher.add_data(
6375 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
6376 SuffixStore::with_capacity(100),
6377 |_, _| {},
6378 );
6379 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
6380
6381 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6382 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6383 assert!(!matcher.next_sequence(|_| {}));
6384
6385 matcher.add_data(
6386 alloc::vec![0, 0, 0, 0, 0],
6387 SuffixStore::with_capacity(100),
6388 |_, _| {},
6389 );
6390 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
6391
6392 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6393 assert!(!matcher.next_sequence(|_| {}));
6394
6395 matcher.add_data(
6396 alloc::vec![7, 8, 9, 10, 11],
6397 SuffixStore::with_capacity(100),
6398 |_, _| {},
6399 );
6400 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
6401
6402 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6403 assert!(!matcher.next_sequence(|_| {}));
6404
6405 matcher.add_data(
6406 alloc::vec![1, 3, 5, 7, 9],
6407 SuffixStore::with_capacity(100),
6408 |_, _| {},
6409 );
6410 matcher.skip_matching();
6411 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
6412 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
6413 assert!(!matcher.next_sequence(|_| {}));
6414
6415 matcher.add_data(
6416 alloc::vec![1, 3, 5, 7, 9],
6417 SuffixStore::with_capacity(100),
6418 |_, _| {},
6419 );
6420 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
6421
6422 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6423 assert!(!matcher.next_sequence(|_| {}));
6424
6425 matcher.add_data(
6426 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
6427 SuffixStore::with_capacity(100),
6428 |_, _| {},
6429 );
6430 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
6431
6432 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6433 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6434 assert!(!matcher.next_sequence(|_| {}));
6435
6436 assert_eq!(reconstructed, original_data);
6437}
6438
6439#[test]
6440fn dfast_matches_roundtrip_multi_block_pattern() {
6441 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
6442 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
6443 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
6444
6445 let mut matcher = DfastMatchGenerator::new(1 << 22);
6446 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
6447 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
6448 Sequence::Triple {
6449 literals,
6450 offset,
6451 match_len,
6452 } => {
6453 decoded.extend_from_slice(literals);
6454 let start = decoded.len() - offset;
6455 for i in 0..match_len {
6456 let byte = decoded[start + i];
6457 decoded.push(byte);
6458 }
6459 }
6460 };
6461
6462 matcher.add_data(first_block.clone(), |_| {});
6463 let mut history = Vec::new();
6464 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
6465 assert_eq!(history, first_block);
6466
6467 matcher.add_data(second_block.clone(), |_| {});
6468 let prefix_len = history.len();
6469 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
6470
6471 assert_eq!(&history[prefix_len..], second_block.as_slice());
6472}
6473
6474#[test]
6491fn dfast_accepts_exact_five_byte_match() {
6492 let mut data = Vec::new();
6506 data.push(b'Z'); data.extend_from_slice(b"ABCDE"); data.extend_from_slice(b"!!!!!!!!!!!!!!!!!!!!!!!"); data.extend_from_slice(b"ABCDE"); data.push(b'F'); data.extend_from_slice(b"GHIJKLMNOPQRSTUVWXYZ"); assert_eq!(data.len(), 55);
6517
6518 let mut matcher = DfastMatchGenerator::new(1 << 22);
6519 matcher.add_data(data.clone(), |_| {});
6520
6521 let mut saw_five_byte_match = false;
6522 let mut saw_longer_match = false;
6523 matcher.start_matching(|seq| {
6524 if let Sequence::Triple {
6525 offset, match_len, ..
6526 } = seq
6527 {
6528 if offset == 28 && match_len == 5 {
6529 saw_five_byte_match = true;
6530 } else if offset == 28 && match_len > 5 {
6531 saw_longer_match = true;
6532 }
6533 }
6534 });
6535
6536 assert!(
6537 saw_five_byte_match,
6538 "dfast must accept the exact-5-byte match — a 6-byte floor would skip it"
6539 );
6540 assert!(
6541 !saw_longer_match,
6542 "fixture pinned to length 5 — byte 33 ('F') must terminate the extension"
6543 );
6544}
6545
6546#[test]
6547fn driver_switches_backends_and_initializes_dfast_via_reset() {
6548 let mut driver = MatchGeneratorDriver::new(32, 2);
6549
6550 driver.reset(CompressionLevel::Default);
6551 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Dfast);
6552 assert_eq!(driver.window_size(), (1u64 << 21));
6553
6554 let mut first = driver.get_next_space();
6555 first[..12].copy_from_slice(b"abcabcabcabc");
6556 first.truncate(12);
6557 driver.commit_space(first);
6558 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
6559 driver.skip_matching_with_hint(None);
6560
6561 let mut second = driver.get_next_space();
6562 second[..12].copy_from_slice(b"abcabcabcabc");
6563 second.truncate(12);
6564 driver.commit_space(second);
6565
6566 let mut reconstructed = b"abcabcabcabc".to_vec();
6567 driver.start_matching(|seq| match seq {
6568 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6569 Sequence::Triple {
6570 literals,
6571 offset,
6572 match_len,
6573 } => {
6574 reconstructed.extend_from_slice(literals);
6575 let start = reconstructed.len() - offset;
6576 for i in 0..match_len {
6577 let byte = reconstructed[start + i];
6578 reconstructed.push(byte);
6579 }
6580 }
6581 });
6582 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
6583
6584 driver.reset(CompressionLevel::Fastest);
6585 assert_eq!(driver.window_size(), (1u64 << 19));
6586}
6587
6588#[test]
6589fn driver_level5_selects_row_backend() {
6590 let mut driver = MatchGeneratorDriver::new(32, 2);
6591 driver.reset(CompressionLevel::Level(5));
6592 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6593 assert_eq!(
6601 driver.parse,
6602 super::strategy::ParseMode::Greedy,
6603 "L5 must route to start_matching_greedy (parse == Greedy)",
6604 );
6605 assert_eq!(
6606 driver.row_matcher().lazy_depth,
6607 0,
6608 "row matcher lazy_depth must mirror the greedy parse mode",
6609 );
6610}
6611
6612#[test]
6620fn driver_level4_greedy_round_trip_single_slice() {
6621 let mut driver = MatchGeneratorDriver::new(64, 2);
6622 driver.reset(CompressionLevel::Level(4));
6623 let input = b"abcdefgh_abcdefgh_abcdefgh_abcdefgh";
6624 let mut space = driver.get_next_space();
6625 space[..input.len()].copy_from_slice(input);
6626 space.truncate(input.len());
6627 driver.commit_space(space);
6628
6629 let mut reconstructed: Vec<u8> = Vec::new();
6630 let mut saw_triple = false;
6631 driver.start_matching(|seq| match seq {
6632 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6633 Sequence::Triple {
6634 literals,
6635 offset,
6636 match_len,
6637 } => {
6638 saw_triple = true;
6639 reconstructed.extend_from_slice(literals);
6640 let start = reconstructed.len() - offset;
6641 for i in 0..match_len {
6642 let byte = reconstructed[start + i];
6643 reconstructed.push(byte);
6644 }
6645 }
6646 });
6647 assert_eq!(
6648 reconstructed,
6649 input.to_vec(),
6650 "L4 greedy parse failed to reconstruct repeating-pattern input",
6651 );
6652 assert!(
6653 saw_triple,
6654 "L4 greedy parse on a repeating pattern must emit at least one match (Triple)",
6655 );
6656}
6657
6658#[test]
6659fn driver_level4_greedy_round_trip_cross_slice() {
6660 let mut driver = MatchGeneratorDriver::new(32, 4);
6665 driver.reset(CompressionLevel::Level(4));
6666 let chunk = b"the quick brown fox jumps over!!";
6667 assert_eq!(chunk.len(), 32);
6668
6669 let mut first = driver.get_next_space();
6670 first[..chunk.len()].copy_from_slice(chunk);
6671 first.truncate(chunk.len());
6672 driver.commit_space(first);
6673
6674 let mut first_recon: Vec<u8> = Vec::new();
6675 driver.start_matching(|seq| match seq {
6676 Sequence::Literals { literals } => first_recon.extend_from_slice(literals),
6677 Sequence::Triple {
6678 literals,
6679 offset,
6680 match_len,
6681 } => {
6682 first_recon.extend_from_slice(literals);
6683 let start = first_recon.len() - offset;
6684 for i in 0..match_len {
6685 let byte = first_recon[start + i];
6686 first_recon.push(byte);
6687 }
6688 }
6689 });
6690 assert_eq!(
6691 first_recon,
6692 chunk.to_vec(),
6693 "first slice failed to round-trip"
6694 );
6695
6696 let mut second = driver.get_next_space();
6697 second[..chunk.len()].copy_from_slice(chunk);
6698 second.truncate(chunk.len());
6699 driver.commit_space(second);
6700
6701 let mut full = first_recon.clone();
6702 let mut saw_cross_slice_match = false;
6703 driver.start_matching(|seq| match seq {
6704 Sequence::Literals { literals } => full.extend_from_slice(literals),
6705 Sequence::Triple {
6706 literals,
6707 offset,
6708 match_len,
6709 } => {
6710 if offset >= chunk.len() {
6714 saw_cross_slice_match = true;
6715 }
6716 full.extend_from_slice(literals);
6717 let start = full.len() - offset;
6718 for i in 0..match_len {
6719 let byte = full[start + i];
6720 full.push(byte);
6721 }
6722 }
6723 });
6724 let mut expected = chunk.to_vec();
6725 expected.extend_from_slice(chunk);
6726 assert_eq!(
6727 full, expected,
6728 "cross-slice L4 greedy parse failed to reconstruct"
6729 );
6730 assert!(
6731 saw_cross_slice_match,
6732 "L4 greedy parse must match across slice boundaries (history is shared)",
6733 );
6734}
6735
6736#[cfg(test)]
6740impl MatchGeneratorDriver {
6741 pub(crate) fn set_config_override(
6745 &mut self,
6746 search: super::strategy::SearchMethod,
6747 parse: super::strategy::ParseMode,
6748 ) {
6749 self.config_override = Some((search, parse));
6750 }
6751
6752 pub(crate) fn reset_on_hc_lazy(&mut self, level: CompressionLevel) {
6757 self.set_config_override(
6758 super::strategy::SearchMethod::HashChain,
6759 super::strategy::ParseMode::Lazy2,
6760 );
6761 self.reset(level);
6762 }
6763}
6764
6765#[cfg(test)]
6769fn drive_roundtrip_with_override(
6770 level: CompressionLevel,
6771 over: Option<(super::strategy::SearchMethod, super::strategy::ParseMode)>,
6772 data: &[u8],
6773) -> Vec<u8> {
6774 let mut driver = MatchGeneratorDriver::new(1 << 17, 8);
6775 if let Some((s, p)) = over {
6776 driver.set_config_override(s, p);
6777 }
6778 driver.reset(level);
6779
6780 let mut out: Vec<u8> = Vec::with_capacity(data.len());
6781 let mut offset_in_data = 0usize;
6782 while offset_in_data < data.len() {
6783 let mut space = driver.get_next_space();
6784 let take = (data.len() - offset_in_data).min(space.len());
6785 space[..take].copy_from_slice(&data[offset_in_data..offset_in_data + take]);
6786 space.truncate(take);
6787 driver.commit_space(space);
6788 offset_in_data += take;
6789
6790 driver.start_matching(|seq| match seq {
6791 Sequence::Literals { literals } => out.extend_from_slice(literals),
6792 Sequence::Triple {
6793 literals,
6794 offset,
6795 match_len,
6796 } => {
6797 out.extend_from_slice(literals);
6798 let start = out.len() - offset;
6799 for i in 0..match_len {
6800 let byte = out[start + i];
6801 out.push(byte);
6802 }
6803 }
6804 });
6805 }
6806 out
6807}
6808
6809#[test]
6814fn parse_search_matrix_decoupled_roundtrips() {
6815 use super::strategy::{ParseMode, SearchMethod};
6816 let mut data = Vec::new();
6818 for i in 0..4000u32 {
6819 data.extend_from_slice(b"the quick brown fox ");
6820 data.extend_from_slice(&i.to_le_bytes());
6821 }
6822
6823 let got = drive_roundtrip_with_override(
6826 CompressionLevel::Level(5),
6827 Some((SearchMethod::HashChain, ParseMode::Greedy)),
6828 &data,
6829 );
6830 assert_eq!(got, data, "greedy-on-hashchain diverged");
6831
6832 let got = drive_roundtrip_with_override(
6835 CompressionLevel::Level(8),
6836 Some((SearchMethod::RowHash, ParseMode::Lazy2)),
6837 &data,
6838 );
6839 assert_eq!(got, data, "lazy2-on-rowhash diverged");
6840
6841 let got = drive_roundtrip_with_override(
6843 CompressionLevel::Level(6),
6844 Some((SearchMethod::RowHash, ParseMode::Lazy)),
6845 &data,
6846 );
6847 assert_eq!(got, data, "lazy-on-rowhash diverged");
6848}
6849
6850#[test]
6855fn row_mls_knob_gates_matches_and_roundtrips() {
6856 let data: Vec<u8> = (0..4000u32)
6857 .flat_map(|i| {
6858 let mut v = b"abcdefgh".to_vec();
6859 v.extend_from_slice(&i.to_le_bytes());
6860 v
6861 })
6862 .collect();
6863
6864 for mls in [4usize, 5, 6, 7] {
6865 let mut matcher = RowMatchGenerator::new(1 << 22);
6866 let mut cfg = ROW_CONFIG;
6867 cfg.mls = mls;
6868 matcher.configure(cfg);
6869 matcher.add_data(data.clone(), |_| {});
6870
6871 let mut out: Vec<u8> = Vec::with_capacity(data.len());
6872 let mut shortest_match = usize::MAX;
6873 matcher.start_matching(|seq| match seq {
6874 Sequence::Literals { literals } => out.extend_from_slice(literals),
6875 Sequence::Triple {
6876 literals,
6877 offset,
6878 match_len,
6879 } => {
6880 out.extend_from_slice(literals);
6881 shortest_match = shortest_match.min(match_len);
6882 let start = out.len() - offset;
6883 for i in 0..match_len {
6884 let byte = out[start + i];
6885 out.push(byte);
6886 }
6887 }
6888 });
6889
6890 assert_eq!(out, data, "mls={mls} round-trip diverged");
6891 if shortest_match != usize::MAX {
6892 assert!(
6893 shortest_match >= mls,
6894 "mls={mls}: emitted a {shortest_match}-byte match below the floor",
6895 );
6896 }
6897 }
6898}
6899
6900#[test]
6906fn parse_mode_follows_search_axis_not_strategy_tag() {
6907 use super::strategy::{ParseMode, SearchMethod};
6908 let mut p = LEVEL_TABLE[15];
6910 assert_eq!(p.parse(), ParseMode::Optimal, "BinaryTree search → Optimal");
6911 p.search = SearchMethod::RowHash;
6914 p.lazy_depth = 0;
6915 assert_eq!(p.parse(), ParseMode::Greedy, "RowHash + depth 0 → Greedy");
6916 p.lazy_depth = 2;
6917 assert_eq!(p.parse(), ParseMode::Lazy2, "RowHash + depth 2 → Lazy2");
6918}
6919
6920#[test]
6925fn config_override_is_consumed_by_reset() {
6926 use super::strategy::{ParseMode, SearchMethod};
6927 let mut driver = MatchGeneratorDriver::new(1 << 17, 8);
6928 driver.set_config_override(SearchMethod::RowHash, ParseMode::Lazy2);
6929 assert!(driver.config_override.is_some());
6930 driver.reset(CompressionLevel::Level(5));
6931 assert!(
6932 driver.config_override.is_none(),
6933 "override must be consumed after one reset",
6934 );
6935}
6936
6937#[cfg(test)]
6942fn l4_greedy_round_trip(slice_size: usize, max_slices: usize, data: &[u8]) -> (usize, usize) {
6943 let mut driver = MatchGeneratorDriver::new(slice_size, max_slices);
6944 driver.reset(CompressionLevel::Level(4));
6945
6946 let mut reconstructed: Vec<u8> = Vec::with_capacity(data.len());
6947 let mut triple_count = 0usize;
6948 let mut max_offset = 0usize;
6949
6950 let mut offset_in_data = 0usize;
6955 while offset_in_data < data.len() {
6956 let mut space = driver.get_next_space();
6957 let space_cap = space.len();
6958 let take = (data.len() - offset_in_data).min(space_cap);
6959 space[..take].copy_from_slice(&data[offset_in_data..offset_in_data + take]);
6960 space.truncate(take);
6961 driver.commit_space(space);
6962 offset_in_data += take;
6963
6964 driver.start_matching(|seq| match seq {
6965 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6966 Sequence::Triple {
6967 literals,
6968 offset,
6969 match_len,
6970 } => {
6971 triple_count += 1;
6972 if offset > max_offset {
6973 max_offset = offset;
6974 }
6975 reconstructed.extend_from_slice(literals);
6976 let start = reconstructed.len() - offset;
6977 for i in 0..match_len {
6978 let byte = reconstructed[start + i];
6979 reconstructed.push(byte);
6980 }
6981 }
6982 });
6983 }
6984
6985 if data.is_empty() {
6989 let mut space = driver.get_next_space();
6990 space.truncate(0);
6991 driver.commit_space(space);
6992 driver.start_matching(|seq| match seq {
6993 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6994 Sequence::Triple { .. } => panic!("empty input must not emit any matches"),
6995 });
6996 }
6997
6998 assert_eq!(reconstructed, data, "L4 greedy round-trip diverged");
6999 (triple_count, max_offset)
7000}
7001
7002#[test]
7013fn driver_level5_greedy_tail_rep_only_reachable() {
7014 let first: &[u8] = b"ABCDABCDABCDABCD"; let second: &[u8] = b"ABCDA"; let mut driver = MatchGeneratorDriver::new(16, 2);
7029 driver.reset(CompressionLevel::Level(5));
7030
7031 let mut first_space = driver.get_next_space();
7032 first_space[..first.len()].copy_from_slice(first);
7033 first_space.truncate(first.len());
7034 driver.commit_space(first_space);
7035 driver.start_matching(|_| {});
7036
7037 let mut second_space = driver.get_next_space();
7038 second_space[..second.len()].copy_from_slice(second);
7039 second_space.truncate(second.len());
7040 driver.commit_space(second_space);
7041
7042 let mut second_slice_triples = 0usize;
7043 driver.start_matching(|seq| {
7044 if matches!(seq, Sequence::Triple { .. }) {
7045 second_slice_triples += 1;
7046 }
7047 });
7048
7049 assert!(
7050 second_slice_triples >= 1,
7051 "tail rep-only position must produce a match in the second slice \
7052 (got {second_slice_triples} triples)",
7053 );
7054}
7055
7056#[test]
7057fn driver_level4_greedy_empty_input_emits_nothing() {
7058 let mut driver = MatchGeneratorDriver::new(64, 2);
7062 driver.reset(CompressionLevel::Level(4));
7063 let mut space = driver.get_next_space();
7068 space.truncate(0);
7069 driver.commit_space(space);
7070 let mut emitted_anything = false;
7071 driver.start_matching(|_| emitted_anything = true);
7072 assert!(!emitted_anything, "empty slice must not emit any sequences",);
7073}
7074
7075#[test]
7076fn driver_level4_greedy_sub_min_lookahead_input() {
7077 let data: &[u8] = b"abcd"; let (triples, _) = l4_greedy_round_trip(64, 2, data);
7082 assert_eq!(
7083 triples, 0,
7084 "sub-min-lookahead input must not emit any matches (got {triples})",
7085 );
7086}
7087
7088#[test]
7089fn driver_level4_greedy_incompressible_input() {
7090 let mut data = alloc::vec::Vec::with_capacity(256);
7095 let mut x: u32 = 0xDEAD_BEEF;
7096 for _ in 0..256 {
7097 x = x.wrapping_mul(1_103_515_245).wrapping_add(12345);
7098 data.push((x >> 16) as u8);
7099 }
7100 let (_triples, _) = l4_greedy_round_trip(64, 8, &data);
7101 }
7104
7105#[test]
7106fn driver_level4_greedy_long_literal_run_skip_step_growth() {
7107 let mut data = alloc::vec::Vec::with_capacity(2048);
7122 let mut x: u32 = 0xC0FF_EE00;
7123 for _ in 0..2048 {
7124 x = x.wrapping_mul(0x9E37_79B9).wrapping_add(0xCAFEBABE);
7125 data.push((x >> 24) as u8);
7126 }
7127 let (_triples, _) = l4_greedy_round_trip(512, 8, &data);
7128}
7129
7130#[test]
7131fn driver_level4_greedy_all_zeros_heavy_rep1() {
7132 let data: Vec<u8> = alloc::vec![0u8; 128];
7137 let (triples, max_offset) = l4_greedy_round_trip(64, 8, &data);
7138 assert!(
7139 triples >= 1,
7140 "all-zeros input must produce at least one rep1 match",
7141 );
7142 assert_eq!(
7146 max_offset, 1,
7147 "all-zeros L4 greedy parse should commit at offset 1 (got {max_offset})",
7148 );
7149}
7150
7151#[test]
7157fn driver_level4_greedy_periodic_pattern_rep_cascade() {
7158 let unit: &[u8] = b"alpha_beta_gamma";
7159 assert_eq!(unit.len(), 16);
7160 let mut data: Vec<u8> = Vec::with_capacity(unit.len() * 32);
7161 for _ in 0..32 {
7162 data.extend_from_slice(unit);
7163 }
7164 let (triples, max_offset) = l4_greedy_round_trip(64, 16, &data);
7165 assert!(
7166 triples >= 1,
7167 "periodic 16-byte payload must emit matches (got {triples})",
7168 );
7169 assert!(
7170 max_offset >= 16,
7171 "periodic 16-byte payload must produce at least one offset >= 16 \
7172 (got max_offset = {max_offset})",
7173 );
7174}
7175
7176#[test]
7177fn driver_reset_keeps_strategy_tag_in_sync_with_active_backend() {
7178 use super::strategy::StrategyTag;
7179
7180 fn check(level: CompressionLevel, expected: StrategyTag) {
7181 let mut driver = MatchGeneratorDriver::new(32, 2);
7182 driver.reset(level);
7183 assert_eq!(
7184 driver.strategy_tag, expected,
7185 "strategy_tag wrong for {level:?}"
7186 );
7187 assert_eq!(
7188 driver.strategy_tag.backend(),
7189 driver.active_backend(),
7190 "strategy_tag backend disagrees with active_backend for {level:?}"
7191 );
7192 }
7193
7194 check(CompressionLevel::Level(1), StrategyTag::Fast);
7195 check(CompressionLevel::Level(2), StrategyTag::Fast);
7196 check(CompressionLevel::Level(3), StrategyTag::Dfast);
7197 check(CompressionLevel::Level(4), StrategyTag::Dfast);
7198 check(CompressionLevel::Level(5), StrategyTag::Greedy);
7199 check(CompressionLevel::Level(7), StrategyTag::Lazy);
7200 check(CompressionLevel::Level(12), StrategyTag::Lazy);
7201 check(CompressionLevel::Level(13), StrategyTag::Btlazy2);
7202 check(CompressionLevel::Level(14), StrategyTag::Btlazy2);
7203 check(CompressionLevel::Level(15), StrategyTag::Btlazy2);
7204 check(CompressionLevel::Level(16), StrategyTag::BtOpt);
7205 check(CompressionLevel::Level(18), StrategyTag::BtUltra);
7206 check(CompressionLevel::Level(22), StrategyTag::BtUltra2);
7207 check(CompressionLevel::Fastest, StrategyTag::Fast);
7208 check(CompressionLevel::Default, StrategyTag::Dfast);
7209 check(CompressionLevel::Better, StrategyTag::Lazy);
7210 check(CompressionLevel::Best, StrategyTag::Btlazy2);
7212}
7213
7214#[test]
7215fn level_16_17_map_to_btopt_strategy() {
7216 use super::strategy::{BackendTag, StrategyTag};
7217 let p16 = resolve_level_params(CompressionLevel::Level(16), None);
7218 let p17 = resolve_level_params(CompressionLevel::Level(17), None);
7219 assert_eq!(p16.backend(), BackendTag::HashChain);
7220 assert_eq!(p17.backend(), BackendTag::HashChain);
7221 assert_eq!(StrategyTag::for_level(16), StrategyTag::BtOpt);
7222 assert_eq!(StrategyTag::for_level(17), StrategyTag::BtOpt);
7223}
7224
7225#[test]
7226fn level_18_maps_to_btultra_level_19_to_btultra2_strategy() {
7227 use super::strategy::{BackendTag, StrategyTag};
7228 let p18 = resolve_level_params(CompressionLevel::Level(18), None);
7233 let p19 = resolve_level_params(CompressionLevel::Level(19), None);
7234 assert_eq!(p18.backend(), BackendTag::HashChain);
7235 assert_eq!(p19.backend(), BackendTag::HashChain);
7236 assert_eq!(StrategyTag::for_level(18), StrategyTag::BtUltra);
7237 assert_eq!(StrategyTag::for_level(19), StrategyTag::BtUltra2);
7238}
7239
7240#[test]
7241fn level_20_22_map_to_btultra2_strategy() {
7242 use super::strategy::{BackendTag, StrategyTag};
7243 for level in 20..=22 {
7244 let params = resolve_level_params(CompressionLevel::Level(level), None);
7245 assert_eq!(params.backend(), BackendTag::HashChain);
7246 assert_eq!(StrategyTag::for_level(level as u8), StrategyTag::BtUltra2);
7247 }
7248}
7249
7250#[test]
7251fn level22_uses_donor_target_length_and_large_input_tables() {
7252 let params = resolve_level_params(CompressionLevel::Level(22), None);
7253 assert_eq!(params.window_log, 27);
7254 let hc = params.hc.unwrap();
7255 assert_eq!(hc.hash_log, 25);
7256 assert_eq!(hc.chain_log, 27);
7257 assert_eq!(hc.search_depth, 1 << 9);
7258 assert_eq!(hc.target_len, 999);
7259}
7260
7261#[test]
7262fn bt_levels_16_to_21_pin_clevels_params() {
7263 let expected = [
7270 (16u8, 22u8, 22usize, 22usize, 32usize, 48usize),
7272 (17, 23, 22, 23, 32, 64),
7273 (18, 23, 22, 23, 64, 64),
7274 (19, 23, 22, 24, 128, 256),
7275 (20, 25, 23, 25, 128, 256),
7276 (21, 26, 24, 24, 512, 256),
7277 ];
7278 for (level, wlog, hlog, clog, sd, tl) in expected {
7279 let p = resolve_level_params(CompressionLevel::Level(level as i32), None);
7280 assert_eq!(p.window_log, wlog, "level {level} window_log");
7281 let hc = p.hc.unwrap();
7282 assert_eq!(hc.hash_log, hlog, "level {level} hash_log");
7283 assert_eq!(hc.chain_log, clog, "level {level} chain_log");
7284 assert_eq!(hc.search_depth, sd, "level {level} search_depth");
7285 assert_eq!(hc.target_len, tl, "level {level} target_len");
7286 }
7287}
7288
7289#[test]
7290fn level22_source_size_hint_uses_donor_btultra2_tiers() {
7291 let p16k = resolve_level_params(CompressionLevel::Level(22), Some(16 * 1024));
7292 assert_eq!(p16k.window_log, 14);
7293 let hc16k = p16k.hc.unwrap();
7294 assert_eq!(hc16k.hash_log, 15);
7295 assert_eq!(hc16k.chain_log, 15);
7296 assert_eq!(hc16k.search_depth, 1 << 10);
7297 assert_eq!(hc16k.target_len, 999);
7298
7299 let p128k = resolve_level_params(CompressionLevel::Level(22), Some(128 * 1024));
7300 assert_eq!(p128k.window_log, 17);
7301 let hc128k = p128k.hc.unwrap();
7302 assert_eq!(hc128k.hash_log, 17);
7303 assert_eq!(hc128k.chain_log, 18);
7304 assert_eq!(hc128k.search_depth, 1 << 11);
7305 assert_eq!(hc128k.target_len, 999);
7306
7307 let p256k = resolve_level_params(CompressionLevel::Level(22), Some(256 * 1024));
7308 assert_eq!(p256k.window_log, 18);
7309 let hc256k = p256k.hc.unwrap();
7310 assert_eq!(hc256k.hash_log, 19);
7311 assert_eq!(hc256k.chain_log, 19);
7312 assert_eq!(hc256k.search_depth, 1 << 13);
7313 assert_eq!(hc256k.target_len, 999);
7314}
7315
7316#[test]
7317fn level22_small_source_size_hint_matches_donor_cparams() {
7318 use zstd::zstd_safe::zstd_sys;
7319
7320 let source_size = 15_027u64;
7321 let donor = unsafe { zstd_sys::ZSTD_getCParams(22, source_size, 0) };
7322 let params = resolve_level_params(CompressionLevel::Level(22), Some(source_size));
7323
7324 let hc = params.hc.unwrap();
7325 assert_eq!(params.window_log as u32, donor.windowLog);
7326 assert_eq!(hc.chain_log as u32, donor.chainLog);
7327 assert_eq!(hc.hash_log as u32, donor.hashLog);
7328 assert_eq!(hc.search_depth as u32, 1u32 << donor.searchLog);
7329 assert_eq!(HC_OPT_MIN_MATCH_LEN as u32, donor.minMatch);
7330 assert_eq!(hc.target_len as u32, donor.targetLength);
7331}
7332
7333#[test]
7334fn level22_small_source_uses_window_bounded_hash3_log() {
7335 let mut hc = HcMatchGenerator::new(1 << 14);
7336 hc.configure(
7337 BTULTRA2_HC_CONFIG_L22_16K,
7338 super::strategy::StrategyTag::BtUltra2,
7339 14,
7340 );
7341 assert_eq!(hc.table.hash3_log, 14);
7342
7343 hc.configure(
7344 BTULTRA2_HC_CONFIG_L22,
7345 super::strategy::StrategyTag::BtUltra2,
7346 27,
7347 );
7348 assert_eq!(hc.table.hash3_log, HC3_HASH_LOG);
7349}
7350
7351#[test]
7352fn btultra2_seed_pass_initializes_opt_state() {
7353 let mut hc = HcMatchGenerator::new(1 << 20);
7354 hc.configure(
7355 BTULTRA2_HC_CONFIG,
7356 super::strategy::StrategyTag::BtUltra2,
7357 26,
7358 );
7359 let data: Vec<u8> = (0..32 * 1024).map(|i| (i % 251) as u8).collect();
7360 hc.table.add_data(data, |_| {});
7361 hc.start_matching(|_| {});
7362 assert!(
7363 hc.backend.bt_mut().opt_state.lit_length_sum > 0,
7364 "btultra2 first block should seed non-zero sequence statistics"
7365 );
7366 assert!(
7367 hc.backend.bt_mut().opt_state.off_code_sum > 0,
7368 "btultra2 first block should seed offset-code statistics"
7369 );
7370}
7371
7372#[test]
7373fn btultra2_profile_disables_small_offset_handicap() {
7374 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7380 assert!(
7381 !profile.favor_small_offsets,
7382 "btultra2 should match donor opt2 offset pricing"
7383 );
7384 assert!(
7385 profile.accurate,
7386 "btultra2 should use donor opt2 accurate pricing"
7387 );
7388}
7389
7390#[test]
7391fn btultra_profile_keeps_donor_search_depth_budget() {
7392 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra>();
7393 assert_eq!(
7394 p.max_chain_depth, 64,
7395 "btultra chain-depth budget must match clevels.h level 18 searchLog 6 (1 << 6 = 64)"
7396 );
7397}
7398
7399#[test]
7400fn btopt_profile_keeps_donor_search_depth_budget() {
7401 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtOpt>();
7402 assert_eq!(
7403 p.max_chain_depth, 32,
7404 "btopt should not cap chain depth below donor btopt search budget"
7405 );
7406}
7407
7408#[test]
7409fn sufficient_match_len_is_clamped_by_target_len() {
7410 let mut hc = HcMatchGenerator::new(1 << 20);
7411 hc.configure(
7412 BTULTRA2_HC_CONFIG,
7413 super::strategy::StrategyTag::BtUltra2,
7414 26,
7415 );
7416 hc.hc.target_len = 13;
7417 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7418 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 13);
7419}
7420
7421#[test]
7422fn opt_modes_use_target_len_as_sufficient_len() {
7423 use super::strategy;
7424 let mut hc = HcMatchGenerator::new(1 << 20);
7425 hc.hc.target_len = 57;
7426 let profiles = [
7427 HcOptimalCostProfile::const_for_strategy::<strategy::BtOpt>(),
7428 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra>(),
7429 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra2>(),
7430 ];
7431 for profile in profiles {
7432 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 57);
7433 }
7434}
7435
7436#[test]
7437fn sufficient_match_len_is_capped_by_opt_num() {
7438 let mut hc = HcMatchGenerator::new(1 << 20);
7439 hc.hc.target_len = usize::MAX / 2;
7440 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7441 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), HC_OPT_NUM - 1);
7442}
7443
7444#[test]
7445#[allow(clippy::borrow_deref_ref)]
7446fn dictionary_entropy_seed_initializes_opt_state_from_tables() {
7447 let mut hc = HcMatchGenerator::new(1 << 20);
7448 hc.configure(
7449 BTULTRA2_HC_CONFIG,
7450 super::strategy::StrategyTag::BtUltra2,
7451 26,
7452 );
7453
7454 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
7455 b"aaabbbbccccddddeeeeefffffgggg",
7456 );
7457 let ll = crate::fse::fse_encoder::default_ll_table();
7458 let ml = crate::fse::fse_encoder::default_ml_table();
7459 let of = crate::fse::fse_encoder::default_of_table();
7460 hc.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
7461
7462 hc.backend.bt_mut().opt_state.rescale_freqs(
7463 b"abcd",
7464 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7465 );
7466
7467 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
7468 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7469 1, 1, 1, 1, 1, 1,
7470 ];
7471
7472 assert_ne!(
7473 hc.backend.bt_mut().opt_state.lit_length_freq,
7474 base_ll_freqs,
7475 "dictionary entropy should override fallback LL bootstrap frequencies"
7476 );
7477 assert!(
7478 hc.backend
7479 .bt_mut()
7480 .opt_state
7481 .match_length_freq
7482 .iter()
7483 .any(|&v| v != 1),
7484 "dictionary entropy should seed non-uniform ML frequencies"
7485 );
7486 assert_ne!(
7487 hc.backend.bt_mut().opt_state.off_code_freq[0],
7488 6,
7489 "dictionary entropy should override fallback OF bootstrap frequencies"
7490 );
7491}
7492
7493#[test]
7494#[allow(clippy::borrow_deref_ref)]
7495fn dictionary_fse_seed_applies_without_huffman_seed() {
7496 let mut hc = HcMatchGenerator::new(1 << 20);
7497 hc.configure(
7498 BTULTRA2_HC_CONFIG,
7499 super::strategy::StrategyTag::BtUltra2,
7500 26,
7501 );
7502
7503 let ll = crate::fse::fse_encoder::default_ll_table();
7504 let ml = crate::fse::fse_encoder::default_ml_table();
7505 let of = crate::fse::fse_encoder::default_of_table();
7506 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
7507 hc.backend.bt_mut().opt_state.rescale_freqs(
7508 b"abcd",
7509 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7510 );
7511
7512 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
7513 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7514 1, 1, 1, 1, 1, 1,
7515 ];
7516 assert_ne!(
7517 hc.backend.bt_mut().opt_state.lit_length_freq,
7518 base_ll_freqs,
7519 "FSE seed should still override LL bootstrap frequencies without huffman seed"
7520 );
7521 assert!(
7522 hc.backend
7523 .bt_mut()
7524 .opt_state
7525 .match_length_freq
7526 .iter()
7527 .any(|&v| v != 1),
7528 "FSE seed should still seed non-uniform ML frequencies"
7529 );
7530 assert_ne!(
7531 hc.backend.bt_mut().opt_state.off_code_freq[0],
7532 6,
7533 "FSE seed should still override OF bootstrap frequencies without huffman seed"
7534 );
7535}
7536
7537#[test]
7538#[allow(clippy::borrow_deref_ref)]
7539fn dictionary_seed_overrides_predef_price_mode_on_tiny_input() {
7540 let mut hc = HcMatchGenerator::new(1 << 20);
7541 hc.configure(
7542 BTULTRA2_HC_CONFIG,
7543 super::strategy::StrategyTag::BtUltra2,
7544 26,
7545 );
7546
7547 let ll = crate::fse::fse_encoder::default_ll_table();
7548 let ml = crate::fse::fse_encoder::default_ml_table();
7549 let of = crate::fse::fse_encoder::default_of_table();
7550 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
7551 hc.backend.bt_mut().opt_state.rescale_freqs(
7552 b"abc",
7553 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7554 );
7555 assert!(
7556 matches!(
7557 hc.backend.bt_mut().opt_state.price_type,
7558 HcOptPriceType::Dynamic
7559 ),
7560 "dictionary-seeded first block should stay in dynamic mode even for tiny src"
7561 );
7562}
7563
7564#[test]
7565fn lit_length_price_blocksize_max_costs_one_extra_bit() {
7566 let profile_predef = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7567 let mut stats_predef = HcOptState::new();
7568 stats_predef.price_type = HcOptPriceType::Predefined;
7569 let predef_max = profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX);
7570 let predef_prev =
7571 profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX.saturating_sub(1));
7572 assert_eq!(
7573 predef_max,
7574 predef_prev + HC_BITCOST_MULTIPLIER,
7575 "predefined litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
7576 );
7577
7578 let profile_dyn = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7579 let mut stats_dyn = HcOptState::new();
7580 stats_dyn.price_type = HcOptPriceType::Dynamic;
7581 stats_dyn.lit_length_freq.fill(1);
7582 stats_dyn.lit_length_sum = (HC_MAX_LL + 1) as u32;
7583 stats_dyn.match_length_freq.fill(1);
7584 stats_dyn.match_length_sum = (HC_MAX_ML + 1) as u32;
7585 stats_dyn.off_code_freq.fill(1);
7586 stats_dyn.off_code_sum = (HC_MAX_OFF + 1) as u32;
7587 stats_dyn.lit_freq.fill(1);
7588 stats_dyn.lit_sum = (HC_MAX_LIT + 1) as u32;
7589 stats_dyn.set_base_prices(true);
7590 let dyn_max = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX);
7591 let dyn_prev = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX.saturating_sub(1));
7592 assert_eq!(
7593 dyn_max,
7594 dyn_prev + HC_BITCOST_MULTIPLIER,
7595 "dynamic litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
7596 );
7597}
7598
7599#[test]
7600#[allow(clippy::borrow_deref_ref)]
7601fn btultra2_seed_pass_disabled_when_dictionary_entropy_seed_present() {
7602 let mut hc = HcMatchGenerator::new(1 << 20);
7603 hc.configure(
7604 BTULTRA2_HC_CONFIG,
7605 super::strategy::StrategyTag::BtUltra2,
7606 26,
7607 );
7608 let ll = crate::fse::fse_encoder::default_ll_table();
7609 let ml = crate::fse::fse_encoder::default_ml_table();
7610 let of = crate::fse::fse_encoder::default_of_table();
7611 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
7612 assert!(
7613 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
7614 "dictionary-seeded first block should skip btultra2 warmup pass"
7615 );
7616}
7617
7618#[test]
7619fn btultra2_seed_pass_disabled_when_prefix_history_exists() {
7620 let mut hc = HcMatchGenerator::new(1 << 20);
7621 hc.configure(
7622 BTULTRA2_HC_CONFIG,
7623 super::strategy::StrategyTag::BtUltra2,
7624 26,
7625 );
7626 hc.table.history_abs_start = 17;
7627 hc.table.push_test_chunk(b"abcdefghijklmnop".to_vec());
7628 assert!(
7629 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 9),
7630 "btultra2 warmup must be first-block only (no prefix history)"
7631 );
7632}
7633
7634#[test]
7635fn btultra2_seed_pass_disabled_for_tiny_block() {
7636 let mut hc = HcMatchGenerator::new(1 << 20);
7637 hc.configure(
7638 BTULTRA2_HC_CONFIG,
7639 super::strategy::StrategyTag::BtUltra2,
7640 26,
7641 );
7642 assert!(
7643 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD),
7644 "btultra2 warmup should not run at or below predefined threshold"
7645 );
7646}
7647
7648#[test]
7649fn btultra2_seed_pass_disabled_after_stats_initialized() {
7650 let mut hc = HcMatchGenerator::new(1 << 20);
7651 hc.configure(
7652 BTULTRA2_HC_CONFIG,
7653 super::strategy::StrategyTag::BtUltra2,
7654 26,
7655 );
7656 hc.backend.bt_mut().opt_state.lit_length_sum = 1;
7657 assert!(
7658 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
7659 "btultra2 warmup should run only for first block before stats are initialized"
7660 );
7661}
7662
7663#[test]
7664fn btultra2_seed_pass_disabled_when_not_at_frame_start() {
7665 let mut hc = HcMatchGenerator::new(1 << 20);
7666 hc.configure(
7667 BTULTRA2_HC_CONFIG,
7668 super::strategy::StrategyTag::BtUltra2,
7669 26,
7670 );
7671 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
7674 hc.table.chunk_lens.push_back(HC_PREDEF_THRESHOLD + 32);
7677 assert!(
7678 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
7679 "btultra2 warmup must not run after frame start"
7680 );
7681}
7682
7683#[test]
7684fn btultra2_seed_pass_disabled_when_ldm_sequences_exist() {
7685 let mut hc = HcMatchGenerator::new(1 << 20);
7686 hc.configure(
7687 BTULTRA2_HC_CONFIG,
7688 super::strategy::StrategyTag::BtUltra2,
7689 26,
7690 );
7691 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
7692 hc.table.chunk_lens.push_back(HC_PREDEF_THRESHOLD + 64);
7693 hc.backend.bt_mut().ldm_sequences.push(HcRawSeq {
7694 lit_length: 8,
7695 offset: 16,
7696 match_length: 32,
7697 });
7698 assert!(
7699 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
7700 "btultra2 warmup must not run when LDM already produced sequences"
7701 );
7702}
7703
7704#[test]
7705fn literal_price_uses_eight_bits_when_literals_uncompressed() {
7706 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7707 let mut stats = HcOptState::new();
7708 stats.set_literals_compressed_for_tests(false);
7709 stats.price_type = HcOptPriceType::Predefined;
7710 assert_eq!(
7711 profile.literal_price(&stats, b'a'),
7712 8 * HC_BITCOST_MULTIPLIER,
7713 "uncompressed literals should cost 8 bits regardless of price mode"
7714 );
7715}
7716
7717#[test]
7718fn update_stats_skips_literal_frequencies_when_uncompressed() {
7719 let mut stats = HcOptState::new();
7720 stats.set_literals_compressed_for_tests(false);
7721 stats.update_stats(3, b"abc", 4, 8);
7722 assert_eq!(
7723 stats.lit_sum, 0,
7724 "literal sum must remain unchanged when literal compression is disabled"
7725 );
7726 assert_eq!(
7727 stats.lit_freq.iter().copied().sum::<u32>(),
7728 0,
7729 "literal frequencies must not be updated when literal compression is disabled"
7730 );
7731 assert_eq!(
7732 stats.lit_length_sum, 1,
7733 "literal-length stats still update for sequence modeling"
7734 );
7735 assert_eq!(
7736 stats.match_length_sum, 1,
7737 "match-length stats still update for sequence modeling"
7738 );
7739 assert_eq!(
7740 stats.off_code_sum, 1,
7741 "offset-code stats still update for sequence modeling"
7742 );
7743}
7744
7745#[test]
7746#[allow(clippy::borrow_deref_ref)]
7747fn dictionary_huffman_seed_ignored_when_literals_uncompressed() {
7748 let mut stats = HcOptState::new();
7749 stats.set_literals_compressed_for_tests(false);
7750 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
7751 b"aaaaabbbbcccddeeff00112233445566778899",
7752 );
7753 let ll = crate::fse::fse_encoder::default_ll_table();
7754 let ml = crate::fse::fse_encoder::default_ml_table();
7755 let of = crate::fse::fse_encoder::default_of_table();
7756 stats.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
7757 stats.rescale_freqs(
7758 b"abcd",
7759 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7760 );
7761 assert_eq!(
7762 stats.lit_sum, 0,
7763 "literal sum must stay zero when literals are uncompressed"
7764 );
7765 assert_eq!(
7766 stats.lit_freq.iter().copied().sum::<u32>(),
7767 0,
7768 "literal frequencies must ignore dictionary huffman seed when uncompressed"
7769 );
7770}
7771
7772#[test]
7773fn hc_repcode_candidates_respect_litlen_dependent_rep_order() {
7774 let mut hc = HcMatchGenerator::new(64);
7775 hc.table.history = b"xxxxxxABCDEFABCDEF".to_vec();
7776 hc.table.history_start = 0;
7777 hc.table.history_abs_start = 0;
7778
7779 let abs_pos = 12usize; let current_abs_end = hc.table.history.len();
7781 let reps = [6u32, 3u32, 9u32];
7782
7783 let mut lit_pos_candidates = Vec::new();
7784 hc.hc.for_each_repcode_candidate_with_reps(
7785 &hc.table,
7786 abs_pos,
7787 1,
7788 reps,
7789 current_abs_end,
7790 HC_OPT_MIN_MATCH_LEN,
7791 |c| {
7792 lit_pos_candidates.push(c.offset);
7793 },
7794 );
7795 assert!(
7796 lit_pos_candidates.contains(&6),
7797 "when lit_len>0, rep0 should be considered and match"
7798 );
7799
7800 let mut ll0_candidates = Vec::new();
7801 hc.hc.for_each_repcode_candidate_with_reps(
7802 &hc.table,
7803 abs_pos,
7804 0,
7805 reps,
7806 current_abs_end,
7807 HC_OPT_MIN_MATCH_LEN,
7808 |c| {
7809 ll0_candidates.push(c.offset);
7810 },
7811 );
7812 assert!(
7813 !ll0_candidates.contains(&6),
7814 "when lit_len==0, rep0 is not directly eligible (ll0 semantics)"
7815 );
7816}
7817
7818#[test]
7819fn hc_collect_optimal_candidates_keeps_reps_when_chain_depth_zero() {
7820 let mut hc = HcMatchGenerator::new(64);
7821 hc.hc.search_depth = 0;
7822 hc.table.history = b"xyzxyzxyzxyz".to_vec();
7823 hc.table.history_start = 0;
7824 hc.table.history_abs_start = 0;
7825
7826 let abs_pos = 6usize;
7827 let current_abs_end = hc.table.history.len();
7828 let profile = HcOptimalCostProfile {
7829 max_chain_depth: 0,
7830 sufficient_match_len: usize::MAX / 2,
7831 accurate: false,
7832 favor_small_offsets: false,
7833 };
7834 let mut out = Vec::new();
7835 hc.collect_optimal_candidates(
7836 abs_pos,
7837 current_abs_end,
7838 profile,
7839 HcCandidateQuery {
7840 reps: [3, 6, 9],
7841 lit_len: 1,
7842 ldm_candidate: None,
7843 },
7844 &mut out,
7845 );
7846 assert!(
7847 !out.is_empty(),
7848 "rep candidates should remain available even when chain depth is zero"
7849 );
7850 assert!(
7851 out.iter().any(|c| c.offset == 3),
7852 "rep0 candidate should be retained"
7853 );
7854}
7855
7856#[test]
7857fn hc_collect_optimal_candidates_rep_tail_match_skips_chain_probe() {
7858 let mut hc = HcMatchGenerator::new(64);
7859 hc.table.history = b"aaaaaaaaaa".to_vec();
7860 hc.table.history_start = 0;
7861 hc.table.history_abs_start = 0;
7862 hc.table.position_base = 0;
7863 hc.hc.search_depth = 32;
7864 let abs_pos = 6usize;
7865 hc.table.ensure_tables();
7866 hc.table.insert_positions(0, abs_pos);
7867
7868 let profile = HcOptimalCostProfile {
7869 max_chain_depth: 32,
7870 sufficient_match_len: usize::MAX / 2,
7871 accurate: true,
7872 favor_small_offsets: false,
7873 };
7874 let mut out = Vec::new();
7875 hc.collect_optimal_candidates(
7876 abs_pos,
7877 hc.table.history.len(),
7878 profile,
7879 HcCandidateQuery {
7880 reps: [1, 4, 8],
7881 lit_len: 1,
7882 ldm_candidate: None,
7883 },
7884 &mut out,
7885 );
7886
7887 assert!(
7888 out.iter()
7889 .all(|candidate| matches!(candidate.offset, 1 | 4)),
7890 "terminal rep match should return before chain probing adds non-rep offsets"
7891 );
7892}
7893
7894#[test]
7895fn hc_collect_optimal_candidates_long_chain_match_advances_skip_window() {
7896 let mut hc = HcMatchGenerator::new(128);
7897 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
7898 hc.table.history_start = 0;
7899 hc.table.history_abs_start = 0;
7900 hc.table.position_base = 0;
7901 hc.hc.search_depth = 32;
7902 let abs_pos = 9usize;
7903 hc.table.ensure_tables();
7904 hc.table.insert_positions(0, abs_pos);
7905 hc.table.skip_insert_until_abs = 0;
7906
7907 let profile = HcOptimalCostProfile {
7908 max_chain_depth: 32,
7909 sufficient_match_len: usize::MAX / 2,
7910 accurate: true,
7911 favor_small_offsets: false,
7912 };
7913 let mut out = Vec::new();
7914 hc.collect_optimal_candidates(
7915 abs_pos,
7916 hc.table.history.len(),
7917 profile,
7918 HcCandidateQuery {
7919 reps: [1, 4, 8],
7920 lit_len: 1,
7921 ldm_candidate: None,
7922 },
7923 &mut out,
7924 );
7925
7926 assert!(
7927 hc.table.skip_insert_until_abs > abs_pos,
7928 "long chain match should advance skip window to avoid redundant immediate insertions"
7929 );
7930}
7931
7932#[test]
7933fn hc_collect_optimal_candidates_chain_fast_skip_uses_match_end_minus_8() {
7934 let mut hc = HcMatchGenerator::new(128);
7935 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
7936 hc.table.history_start = 0;
7937 hc.table.history_abs_start = 0;
7938 hc.table.position_base = 0;
7939 hc.hc.search_depth = 32;
7940 let abs_pos = 9usize;
7941 hc.table.ensure_tables();
7942 hc.table.insert_positions(0, abs_pos);
7943 hc.table.skip_insert_until_abs = 0;
7944
7945 let profile = HcOptimalCostProfile {
7946 max_chain_depth: 32,
7947 sufficient_match_len: 10,
7948 accurate: true,
7949 favor_small_offsets: false,
7950 };
7951 let mut out = Vec::new();
7952 hc.collect_optimal_candidates(
7953 abs_pos,
7954 hc.table.history.len(),
7955 profile,
7956 HcCandidateQuery {
7957 reps: [1, 4, 8],
7958 lit_len: 1,
7959 ldm_candidate: None,
7960 },
7961 &mut out,
7962 );
7963
7964 let best_match_end = out
7965 .iter()
7966 .map(|candidate| candidate.start.saturating_add(candidate.match_len))
7967 .max()
7968 .expect("expected at least one candidate");
7969 assert!(
7970 hc.table.skip_insert_until_abs > abs_pos,
7971 "chain fast-skip must advance past current position"
7972 );
7973 assert!(
7974 hc.table.skip_insert_until_abs <= best_match_end.saturating_sub(8),
7975 "chain fast-skip must not exceed donor-style matchEndIdx - 8 bound"
7976 );
7977}
7978
7979#[test]
7980fn hc_collect_optimal_candidates_advances_skip_window_on_plain_bt_path() {
7981 let mut hc = HcMatchGenerator::new(256);
7982 hc.table.history = b"abcdefghijklmnop".to_vec();
7983 hc.table.history_start = 0;
7984 hc.table.history_abs_start = 0;
7985 hc.table.position_base = 0;
7986 hc.hc.search_depth = 0;
7987 hc.table.ensure_tables();
7988
7989 let abs_pos = 8usize;
7990 hc.table.skip_insert_until_abs = 0;
7991
7992 let profile = HcOptimalCostProfile {
7993 max_chain_depth: 0,
7994 sufficient_match_len: usize::MAX / 2,
7995 accurate: true,
7996 favor_small_offsets: false,
7997 };
7998 let mut out = Vec::new();
7999 hc.collect_optimal_candidates(
8000 abs_pos,
8001 hc.table.history.len(),
8002 profile,
8003 HcCandidateQuery {
8004 reps: [1, 4, 8],
8005 lit_len: 1,
8006 ldm_candidate: None,
8007 },
8008 &mut out,
8009 );
8010
8011 assert_eq!(
8012 hc.table.skip_insert_until_abs,
8013 abs_pos.saturating_add(1),
8014 "plain BT path should advance skip window by 1 via donor matchEndIdx baseline"
8015 );
8016}
8017
8018#[test]
8031fn hc_ldm_candidates_are_merged_into_optimal_candidates() {
8032 let mut hc = HcMatchGenerator::new(512);
8033 hc.table.history = (0..256).map(|i| (i % 251) as u8).collect();
8034 hc.table.history_start = 0;
8035 hc.table.history_abs_start = 0;
8036
8037 let abs_pos = 128usize;
8038 let current_abs_end = 256usize;
8039 let ldm = MatchCandidate {
8040 start: abs_pos,
8041 offset: 96,
8042 match_len: 40,
8043 };
8044
8045 let profile = HcOptimalCostProfile {
8046 max_chain_depth: 0,
8047 sufficient_match_len: usize::MAX / 2,
8048 accurate: true,
8049 favor_small_offsets: false,
8050 };
8051 let mut out = Vec::new();
8052 hc.collect_optimal_candidates(
8053 abs_pos,
8054 current_abs_end,
8055 profile,
8056 HcCandidateQuery {
8057 reps: [1, 4, 8],
8058 lit_len: 1,
8059 ldm_candidate: Some(ldm),
8060 },
8061 &mut out,
8062 );
8063 assert!(
8064 out.iter().any(
8065 |candidate| candidate.offset == ldm.offset && candidate.match_len == ldm.match_len
8066 ),
8067 "LDM candidate should be present in optimal candidate set"
8068 );
8069}
8070
8071#[test]
8072fn btultra_and_btultra2_both_keep_dictionary_candidates() {
8073 use super::strategy::StrategyTag;
8081
8082 let test_config = HcConfig {
8083 hash_log: 23,
8084 chain_log: 22,
8085 search_depth: 32,
8086 target_len: 256,
8087 search_mls: 4,
8088 };
8089 let window_log = 20u8;
8090
8091 let prepare_history = |hc: &mut HcMatchGenerator, abs_pos: usize| {
8092 hc.table.history = alloc::vec![0u8; 160];
8093 for i in 0..64 {
8094 hc.table.history[i] = b'a' + (i % 7) as u8;
8095 }
8096 for i in 64..160 {
8097 hc.table.history[i] = b'k' + (i % 5) as u8;
8098 }
8099 for i in 0..24 {
8100 hc.table.history[abs_pos + i] = hc.table.history[16 + i];
8101 }
8102 hc.table.history_start = 0;
8103 hc.table.history_abs_start = 0;
8104 hc.table.position_base = 0;
8105 hc.table.ensure_tables();
8106 hc.table.insert_positions(0, abs_pos);
8107 hc.table.dictionary_limit_abs = Some(64);
8108 hc.table.skip_insert_until_abs = 0;
8109 };
8110
8111 let profile = HcOptimalCostProfile {
8112 max_chain_depth: 32,
8113 sufficient_match_len: usize::MAX / 2,
8114 accurate: true,
8115 favor_small_offsets: false,
8116 };
8117 let abs_pos = 96usize;
8118 let mut out = Vec::new();
8119
8120 let mut hc = HcMatchGenerator::new(256);
8121 hc.configure(test_config, StrategyTag::BtUltra2, window_log);
8122 prepare_history(&mut hc, abs_pos);
8123 hc.collect_optimal_candidates(
8124 abs_pos,
8125 160,
8126 profile,
8127 HcCandidateQuery {
8128 reps: [1, 4, 8],
8129 lit_len: 1,
8130 ldm_candidate: None,
8131 },
8132 &mut out,
8133 );
8134 assert!(
8135 out.iter().any(|candidate| candidate.offset >= 32),
8136 "btultra2 should retain dictionary candidates on donor-parity path"
8137 );
8138
8139 let mut hc = HcMatchGenerator::new(256);
8140 hc.configure(test_config, StrategyTag::BtUltra, window_log);
8141 prepare_history(&mut hc, abs_pos);
8142 hc.collect_optimal_candidates(
8143 abs_pos,
8144 160,
8145 profile,
8146 HcCandidateQuery {
8147 reps: [1, 4, 8],
8148 lit_len: 1,
8149 ldm_candidate: None,
8150 },
8151 &mut out,
8152 );
8153 assert!(
8154 out.iter().any(|candidate| candidate.offset >= 32),
8155 "btultra should retain dictionary candidates"
8156 );
8157}
8158
8159#[test]
8160fn driver_small_source_hint_shrinks_dfast_hash_tables() {
8161 let mut driver = MatchGeneratorDriver::new(32, 2);
8162
8163 driver.reset(CompressionLevel::Level(3));
8164 let mut space = driver.get_next_space();
8165 space[..12].copy_from_slice(b"abcabcabcabc");
8166 space.truncate(12);
8167 driver.commit_space(space);
8168 driver.skip_matching_with_hint(None);
8169 let full_long = driver.dfast_matcher().long_hash.len();
8172 let full_short = driver.dfast_matcher().short_hash.len();
8173 assert_eq!(full_long, 1 << DFAST_HASH_BITS);
8174 assert_eq!(
8175 full_short,
8176 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA)
8177 );
8178
8179 driver.set_source_size_hint(1024);
8180 driver.reset(CompressionLevel::Level(3));
8181 let mut space = driver.get_next_space();
8182 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8183 space.truncate(12);
8184 driver.commit_space(space);
8185 driver.skip_matching_with_hint(None);
8186 let hinted_long = driver.dfast_matcher().long_hash.len();
8187 let hinted_short = driver.dfast_matcher().short_hash.len();
8188
8189 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
8197 assert_eq!(hinted_long, 1 << MIN_WINDOW_LOG);
8198 assert_eq!(hinted_short, 1 << MIN_WINDOW_LOG);
8199 assert!(
8200 hinted_long < full_long && hinted_short < full_short,
8201 "tiny source hint should reduce both dfast tables"
8202 );
8203}
8204
8205#[test]
8206fn driver_huge_source_hint_does_not_overflow_table_window_shift() {
8207 let mut driver = MatchGeneratorDriver::new(32, 2);
8213 driver.set_source_size_hint(u64::MAX);
8214 driver.reset(CompressionLevel::Level(3));
8215
8216 let mut space = driver.get_next_space();
8217 space[..12].copy_from_slice(b"abcabcabcabc");
8218 space.truncate(12);
8219 driver.commit_space(space);
8220 driver.skip_matching_with_hint(None);
8221
8222 assert!(
8223 driver.dfast_matcher().long_hash.len() >= 1 << MIN_WINDOW_LOG,
8224 "huge hint must size the dfast table from the real window, not wrap to zero"
8225 );
8226}
8227
8228#[test]
8229fn driver_huge_source_hint_with_dict_does_not_overflow_hc_reserve() {
8230 let mut driver = MatchGeneratorDriver::new(32, 2);
8240 driver.set_source_size_hint(u64::MAX);
8241 driver.set_dictionary_size_hint(64 * 1024);
8242 driver.reset(CompressionLevel::Level(16));
8243
8244 let window = 1usize << 22;
8250 let expected_history_ceiling = window + (window >> 2) + crate::common::MAX_BLOCK_SIZE as usize;
8251 assert!(
8252 driver.hc_matcher().table.history.capacity() >= expected_history_ceiling,
8253 "huge source + dict hint must reserve the clamped HC history ceiling, got {}",
8254 driver.hc_matcher().table.history.capacity()
8255 );
8256
8257 let mut space = driver.get_next_space();
8258 space[..12].copy_from_slice(b"abcabcabcabc");
8259 space.truncate(12);
8260 driver.commit_space(space);
8261 driver.skip_matching_with_hint(None);
8262}
8263
8264#[test]
8265fn driver_small_source_hint_shrinks_row_hash_tables() {
8266 let mut driver = MatchGeneratorDriver::new(32, 2);
8267
8268 driver.reset(CompressionLevel::Level(5));
8269 let mut space = driver.get_next_space();
8270 space[..12].copy_from_slice(b"abcabcabcabc");
8271 space.truncate(12);
8272 driver.commit_space(space);
8273 driver.skip_matching_with_hint(None);
8274 let full_rows = driver.row_matcher().row_heads.len();
8275 assert_eq!(full_rows, 1 << (ROW_L5.hash_bits - ROW_L5.row_log));
8279
8280 driver.set_source_size_hint(1024);
8281 driver.reset(CompressionLevel::Level(5));
8282 let mut space = driver.get_next_space();
8283 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8284 space.truncate(12);
8285 driver.commit_space(space);
8286 driver.skip_matching_with_hint(None);
8287 let hinted_rows = driver.row_matcher().row_heads.len();
8288
8289 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
8294 assert_eq!(
8295 hinted_rows,
8296 1 << ((MIN_WINDOW_LOG as usize) + 1 - ROW_L5.row_log)
8297 );
8298 assert!(
8299 hinted_rows < full_rows,
8300 "tiny source hint should reduce row hash table footprint"
8301 );
8302}
8303
8304#[test]
8305fn row_matches_roundtrip_multi_block_pattern() {
8306 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
8307 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
8308 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
8309
8310 let mut matcher = RowMatchGenerator::new(1 << 22);
8311 matcher.configure(ROW_CONFIG);
8312 matcher.ensure_tables();
8313 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
8314 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
8315 Sequence::Triple {
8316 literals,
8317 offset,
8318 match_len,
8319 } => {
8320 decoded.extend_from_slice(literals);
8321 let start = decoded.len() - offset;
8322 for i in 0..match_len {
8323 let byte = decoded[start + i];
8324 decoded.push(byte);
8325 }
8326 }
8327 };
8328
8329 matcher.add_data(first_block.clone(), |_| {});
8330 let mut history = Vec::new();
8331 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8332 assert_eq!(history, first_block);
8333
8334 matcher.add_data(second_block.clone(), |_| {});
8335 let prefix_len = history.len();
8336 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8337
8338 assert_eq!(&history[prefix_len..], second_block.as_slice());
8339
8340 let third_block: Vec<u8> = (0u8..=255).collect();
8342 matcher.add_data(third_block.clone(), |_| {});
8343 let third_prefix = history.len();
8344 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8345 assert_eq!(&history[third_prefix..], third_block.as_slice());
8346}
8347
8348#[test]
8349fn row_short_block_emits_literals_only() {
8350 let mut matcher = RowMatchGenerator::new(1 << 22);
8351 matcher.configure(ROW_CONFIG);
8352
8353 matcher.add_data(b"abcde".to_vec(), |_| {});
8354
8355 let mut saw_triple = false;
8356 let mut reconstructed = Vec::new();
8357 matcher.start_matching(|seq| match seq {
8358 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
8359 Sequence::Triple { .. } => saw_triple = true,
8360 });
8361
8362 assert!(
8363 !saw_triple,
8364 "row backend must not emit triples for short blocks"
8365 );
8366 assert_eq!(reconstructed, b"abcde");
8367
8368 saw_triple = false;
8370 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
8371 matcher.start_matching(|seq| {
8372 if let Sequence::Triple { .. } = seq {
8373 saw_triple = true;
8374 }
8375 });
8376 assert!(
8377 saw_triple,
8378 "row backend should emit triples on repeated data"
8379 );
8380}
8381
8382#[test]
8383fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
8384 let mut matcher = RowMatchGenerator::new(1 << 22);
8385 matcher.configure(ROW_CONFIG);
8386 matcher.add_data(b"abcabc".to_vec(), |_| {});
8387 matcher.ensure_tables();
8392
8393 let best = MatchCandidate {
8394 start: 0,
8395 offset: 1,
8396 match_len: ROW_MIN_MATCH_LEN,
8397 };
8398 let picked = matcher
8399 .pick_lazy_match(0, 0, Some(best))
8400 .expect("best candidate must survive");
8401
8402 assert_eq!(picked.start, best.start);
8403 assert_eq!(picked.offset, best.offset);
8404 assert_eq!(picked.match_len, best.match_len);
8405}
8406
8407#[test]
8408fn row_backfills_previous_block_tail_for_cross_boundary_match() {
8409 let mut matcher = RowMatchGenerator::new(1 << 22);
8410 matcher.configure(ROW_CONFIG);
8411
8412 let mut first_block = alloc::vec![0xA5; 64];
8413 first_block.extend_from_slice(b"XYZ");
8414 let second_block = b"XYZXYZtail".to_vec();
8415
8416 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
8417 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
8418 Sequence::Triple {
8419 literals,
8420 offset,
8421 match_len,
8422 } => {
8423 decoded.extend_from_slice(literals);
8424 let start = decoded.len() - offset;
8425 for i in 0..match_len {
8426 let byte = decoded[start + i];
8427 decoded.push(byte);
8428 }
8429 }
8430 };
8431
8432 matcher.add_data(first_block.clone(), |_| {});
8433 let mut reconstructed = Vec::new();
8434 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
8435 assert_eq!(reconstructed, first_block);
8436
8437 matcher.add_data(second_block.clone(), |_| {});
8438 let mut saw_cross_boundary = false;
8439 let prefix_len = reconstructed.len();
8440 matcher.start_matching(|seq| {
8441 if let Sequence::Triple {
8442 literals,
8443 offset,
8444 match_len,
8445 } = seq
8446 && literals.is_empty()
8447 && offset == 3
8448 && match_len >= ROW_MIN_MATCH_LEN
8449 {
8450 saw_cross_boundary = true;
8451 }
8452 replay_sequence(&mut reconstructed, seq);
8453 });
8454
8455 assert!(
8456 saw_cross_boundary,
8457 "row matcher should reuse the 3-byte previous-block tail"
8458 );
8459 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
8460}
8461
8462#[test]
8463fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
8464 let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
8465
8466 let mut dense = RowMatchGenerator::new(1 << 22);
8467 dense.configure(ROW_CONFIG);
8468 dense.add_data(data.clone(), |_| {});
8469 dense.skip_matching_with_hint(Some(false));
8470 let dense_slots = dense
8471 .row_positions
8472 .iter()
8473 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8474 .count();
8475
8476 let mut sparse = RowMatchGenerator::new(1 << 22);
8477 sparse.configure(ROW_CONFIG);
8478 sparse.add_data(data, |_| {});
8479 sparse.skip_matching_with_hint(Some(true));
8480 let sparse_slots = sparse
8481 .row_positions
8482 .iter()
8483 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8484 .count();
8485
8486 assert!(
8487 sparse_slots < dense_slots,
8488 "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
8489 );
8490}
8491
8492#[test]
8506fn row_skip_matching_with_none_hint_leaves_interior_empty() {
8507 let data = deterministic_high_entropy_bytes(0x9B47_F2A1_8C5E_3306, 4096);
8508
8509 let mut none_hint = RowMatchGenerator::new(1 << 22);
8510 none_hint.configure(ROW_CONFIG);
8511 none_hint.add_data(data.clone(), |_| {});
8512 none_hint.skip_matching_with_hint(None);
8513 let none_slots = none_hint
8514 .row_positions
8515 .iter()
8516 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8517 .count();
8518
8519 let mut dense = RowMatchGenerator::new(1 << 22);
8522 dense.configure(ROW_CONFIG);
8523 dense.add_data(data, |_| {});
8524 dense.skip_matching_with_hint(Some(false));
8525 let dense_slots = dense
8526 .row_positions
8527 .iter()
8528 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8529 .count();
8530
8531 assert_eq!(
8536 none_slots, 0,
8537 "None hint at block_start=0 must leave row table fully empty \
8538 (donor parity — interior NOT inserted, no pre-block backfill possible)",
8539 );
8540 assert!(
8541 dense_slots > 0,
8542 "Some(false) dict-priming path must still insert densely \
8543 (sanity check: control case for the `none_slots == 0` assertion)",
8544 );
8545}
8546
8547#[test]
8548fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
8549 let mut driver = MatchGeneratorDriver::new(32, 2);
8550
8551 driver.reset(CompressionLevel::Level(3));
8552 let mut space = driver.get_next_space();
8553 space[..12].copy_from_slice(b"abcabcabcabc");
8554 space.truncate(12);
8555 driver.commit_space(space);
8556 driver.skip_matching_with_hint(None);
8557
8558 let long_len = driver.dfast_matcher().long_hash.len();
8562 let short_len = driver.dfast_matcher().short_hash.len();
8563 assert_eq!(
8564 long_len,
8565 1 << DFAST_HASH_BITS,
8566 "unhinted Level(2) should keep default long-hash table size"
8567 );
8568 assert_eq!(
8569 short_len,
8570 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA),
8571 "unhinted Level(2) short-hash should be one bit smaller than long-hash"
8572 );
8573}
8574
8575#[cfg(any())] #[test]
8577fn simple_backend_rejects_undersized_pooled_suffix_store() {
8578 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8579 driver.reset(CompressionLevel::Fastest);
8580
8581 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
8582
8583 let mut space = driver.get_next_space();
8584 space.clear();
8585 space.resize(4096, 0xAB);
8586 driver.commit_space(space);
8587
8588 let last_suffix_slots = driver
8589 .simple()
8590 .window
8591 .last()
8592 .expect("window entry must exist after commit")
8593 .suffixes
8594 .slots
8595 .len();
8596 assert!(
8597 last_suffix_slots >= 4096,
8598 "undersized pooled suffix store must not be reused for larger blocks"
8599 );
8600}
8601
8602#[test]
8603fn source_hint_clamps_driver_slice_size_to_window() {
8604 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8605 driver.set_source_size_hint(1024);
8606 driver.reset(CompressionLevel::Default);
8607
8608 let window = driver.window_size() as usize;
8609 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
8610 assert_eq!(driver.slice_size, window);
8611
8612 let space = driver.get_next_space();
8613 assert_eq!(space.len(), window);
8614 driver.commit_space(space);
8615}
8616
8617#[test]
8618fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
8619 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8620 driver.reset(CompressionLevel::Default);
8621
8622 let large = driver.get_next_space();
8623 let large_capacity = large.capacity();
8624 assert!(large_capacity >= 128 * 1024);
8625 driver.commit_space(large);
8626
8627 driver.set_source_size_hint(1024);
8628 driver.reset(CompressionLevel::Default);
8629
8630 let small = driver.get_next_space();
8631 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
8632 assert!(
8633 small.capacity() >= large_capacity,
8634 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
8635 );
8636}
8637
8638#[test]
8639fn driver_best_to_fastest_releases_oversized_hc_tables() {
8640 let mut driver = MatchGeneratorDriver::new(32, 2);
8641
8642 driver.reset_on_hc_lazy(CompressionLevel::Best);
8647 assert_eq!(driver.window_size(), (1u64 << 22));
8648
8649 let mut space = driver.get_next_space();
8651 space[..12].copy_from_slice(b"abcabcabcabc");
8652 space.truncate(12);
8653 driver.commit_space(space);
8654 driver.skip_matching_with_hint(None);
8655
8656 driver.reset(CompressionLevel::Fastest);
8671 assert_eq!(driver.window_size(), (1u64 << 19));
8672 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
8673}
8674
8675#[test]
8676fn driver_better_to_best_resizes_hc_tables() {
8677 let mut driver = MatchGeneratorDriver::new(32, 2);
8678
8679 driver.reset(CompressionLevel::Level(13));
8683 assert_eq!(driver.window_size(), (1u64 << 22));
8684
8685 let mut space = driver.get_next_space();
8686 space[..12].copy_from_slice(b"abcabcabcabc");
8687 space.truncate(12);
8688 driver.commit_space(space);
8689 driver.skip_matching_with_hint(None);
8690
8691 let hc = driver.hc_matcher();
8692 let better_hash_len = hc.table.hash_table.len();
8693 let better_chain_len = hc.table.chain_table.len();
8694
8695 driver.reset(CompressionLevel::Level(15));
8697 assert_eq!(driver.window_size(), (1u64 << 22));
8698
8699 let mut space = driver.get_next_space();
8701 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8702 space.truncate(12);
8703 driver.commit_space(space);
8704 driver.skip_matching_with_hint(None);
8705
8706 let hc = driver.hc_matcher();
8707 assert!(
8708 hc.table.hash_table.len() > better_hash_len,
8709 "L15 hash_table ({}) should be larger than L13 ({})",
8710 hc.table.hash_table.len(),
8711 better_hash_len
8712 );
8713 assert!(
8714 hc.table.chain_table.len() > better_chain_len,
8715 "L15 chain_table ({}) should be larger than L13 ({})",
8716 hc.table.chain_table.len(),
8717 better_chain_len
8718 );
8719}
8720
8721#[cfg(any())]
8722#[test]
8724fn prime_with_dictionary_preserves_history_for_first_full_block() {
8725 let mut driver = MatchGeneratorDriver::new(8, 1);
8726 driver.reset(CompressionLevel::Fastest);
8727
8728 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8729
8730 let mut space = driver.get_next_space();
8731 space.clear();
8732 space.extend_from_slice(b"abcdefgh");
8733 driver.commit_space(space);
8734
8735 let mut saw_match = false;
8736 driver.start_matching(|seq| {
8737 if let Sequence::Triple {
8738 literals,
8739 offset,
8740 match_len,
8741 } = seq
8742 && literals.is_empty()
8743 && offset == 8
8744 && match_len >= MIN_MATCH_LEN
8745 {
8746 saw_match = true;
8747 }
8748 });
8749
8750 assert!(
8751 saw_match,
8752 "first full block should still match dictionary-primed history"
8753 );
8754}
8755
8756#[cfg(any())]
8757#[test]
8759fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
8760 let mut driver = MatchGeneratorDriver::new(8, 1);
8761 driver.reset(CompressionLevel::Fastest);
8762
8763 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8764
8765 let mut space = driver.get_next_space();
8766 space.clear();
8767 space.extend_from_slice(b"abcdefgh");
8768 driver.commit_space(space);
8769
8770 let mut saw_match = false;
8771 driver.start_matching(|seq| {
8772 if let Sequence::Triple {
8773 literals,
8774 offset,
8775 match_len,
8776 } = seq
8777 && literals.is_empty()
8778 && offset == 24
8779 && match_len >= MIN_MATCH_LEN
8780 {
8781 saw_match = true;
8782 }
8783 });
8784
8785 assert!(
8786 saw_match,
8787 "dictionary bytes should remain addressable until frame output exceeds the live window"
8788 );
8789}
8790
8791#[test]
8792fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
8793 let mut driver = MatchGeneratorDriver::new(8, 1);
8794 driver.reset(CompressionLevel::Fastest);
8795
8796 driver.prime_with_dictionary(&[], [11, 7, 3]);
8797
8798 assert_eq!(driver.simple_mut().offset_hist, [11, 7, 3]);
8799}
8800
8801#[test]
8802fn hc_prime_with_empty_dictionary_disables_btultra2_seed_pass() {
8803 let mut driver = MatchGeneratorDriver::new(8, 1);
8804 driver.reset_on_hc_lazy(CompressionLevel::Better);
8805
8806 driver.prime_with_dictionary(&[], [11, 7, 3]);
8807
8808 assert_eq!(driver.hc_matcher().table.offset_hist, [11, 7, 3]);
8809 assert!(
8810 !driver
8811 .hc_matcher()
8812 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
8813 "btultra2 warmup must stay disabled after dictionary priming, even when dict content is empty"
8814 );
8815}
8816
8817#[test]
8818fn primed_snapshot_not_restored_across_ldm_config_change() {
8819 use super::parameters::CompressionParameters;
8826
8827 let dict = b"abcdefghabcdefghabcdefgh";
8828 let ldm_on = CompressionParameters::builder(CompressionLevel::Level(19))
8829 .enable_long_distance_matching(true)
8830 .build()
8831 .unwrap()
8832 .overrides();
8833 let ldm_off = CompressionParameters::builder(CompressionLevel::Level(19))
8834 .build()
8835 .unwrap()
8836 .overrides();
8837
8838 let mut driver = MatchGeneratorDriver::new(1024, 1);
8839
8840 driver.set_param_overrides(Some(ldm_on));
8842 driver.reset(CompressionLevel::Level(19));
8843 driver.prime_with_dictionary(dict, [1, 4, 8]);
8844 driver.capture_primed_dictionary(CompressionLevel::Level(19));
8845
8846 driver.set_param_overrides(Some(ldm_off));
8849 driver.reset(CompressionLevel::Level(19));
8850 assert!(
8851 !driver.restore_primed_dictionary(CompressionLevel::Level(19)),
8852 "primed snapshot restored across an LDM config change (stale producer)",
8853 );
8854
8855 driver.prime_with_dictionary(dict, [1, 4, 8]);
8858 driver.capture_primed_dictionary(CompressionLevel::Level(19));
8859 driver.reset(CompressionLevel::Level(19));
8860 assert!(
8861 driver.restore_primed_dictionary(CompressionLevel::Level(19)),
8862 "primed snapshot not restored under identical LDM config",
8863 );
8864}
8865
8866#[test]
8867fn hc_prime_with_dictionary_disables_btultra2_seed_pass() {
8868 let mut driver = MatchGeneratorDriver::new(8, 1);
8869 driver.reset_on_hc_lazy(CompressionLevel::Better);
8870
8871 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8872
8873 assert!(
8874 !driver
8875 .hc_matcher()
8876 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
8877 "btultra2 warmup must stay disabled after dictionary priming with content"
8878 );
8879}
8880
8881#[test]
8882fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
8883 let mut driver = MatchGeneratorDriver::new(8, 1);
8884 driver.reset(CompressionLevel::Level(4));
8890
8891 let payload = b"abcdefghijklmnop";
8892 driver.prime_with_dictionary(payload, [1, 4, 8]);
8893
8894 let mut space = driver.get_next_space();
8895 space.clear();
8896 space.extend_from_slice(payload);
8897 driver.commit_space(space);
8898
8899 let mut saw_match = false;
8900 driver.start_matching(|seq| {
8901 if let Sequence::Triple {
8902 literals,
8903 offset,
8904 match_len,
8905 } = seq
8906 && literals.is_empty()
8907 && offset == payload.len()
8908 && match_len >= DFAST_MIN_MATCH_LEN
8909 {
8910 saw_match = true;
8911 }
8912 });
8913
8914 assert!(
8915 saw_match,
8916 "dfast backend should match dictionary-primed history in first full block"
8917 );
8918}
8919
8920#[test]
8921fn prime_with_dictionary_does_not_inflate_reported_window_size() {
8922 let mut driver = MatchGeneratorDriver::new(8, 1);
8923 driver.reset(CompressionLevel::Fastest);
8924
8925 let before = driver.window_size();
8926 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8927 let after = driver.window_size();
8928
8929 assert_eq!(
8930 after, before,
8931 "dictionary retention budget must not change reported frame window size"
8932 );
8933}
8934
8935#[test]
8936fn primed_snapshot_not_restored_when_window_hint_differs() {
8937 let mut driver = MatchGeneratorDriver::new(8, 1);
8947 let level = CompressionLevel::Best;
8948
8949 driver.set_source_size_hint(256 * 1024);
8951 driver.reset(level);
8952 let big_window = driver.window_size();
8953 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8954 driver.capture_primed_dictionary(level);
8955
8956 driver.set_source_size_hint(48 * 1024);
8958 driver.reset(level);
8959 let small_window = driver.window_size();
8960 assert!(
8961 small_window < big_window,
8962 "precondition: the two hints must resolve to different windows \
8963 (small={small_window}, big={big_window})"
8964 );
8965
8966 let restored = driver.restore_primed_dictionary(level);
8967 assert!(
8968 !restored,
8969 "snapshot captured at window {big_window} must NOT be restored into a \
8970 reset advertising window {small_window} (level alone is an insufficient key)"
8971 );
8972}
8973
8974#[test]
8975fn primed_snapshot_restored_for_hints_in_same_window_bucket() {
8976 let mut driver = MatchGeneratorDriver::new(8, 1);
8985 let level = CompressionLevel::Best;
8986
8987 driver.set_source_size_hint(300 * 1024);
8990 driver.reset(level);
8991 let window_a = driver.window_size();
8992 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8993 driver.capture_primed_dictionary(level);
8994
8995 driver.set_source_size_hint(400 * 1024);
8996 driver.reset(level);
8997 let window_b = driver.window_size();
8998 assert_eq!(
8999 window_a, window_b,
9000 "precondition: same-bucket hints must resolve to the same window \
9001 (a={window_a}, b={window_b})"
9002 );
9003
9004 let restored = driver.restore_primed_dictionary(level);
9005 assert!(
9006 restored,
9007 "snapshot captured at a 300 KiB hint must be restored into a 400 KiB \
9008 hint that resolves to the identical matcher shape (raw bytes over-key)"
9009 );
9010}
9011
9012#[test]
9013fn primed_snapshot_restored_across_level22_donor_tier_hints() {
9014 let mut driver = MatchGeneratorDriver::new(8, 1);
9023 let level = CompressionLevel::Level(22);
9024
9025 driver.set_source_size_hint(20 * 1024);
9026 driver.reset(level);
9027 let window_a = driver.window_size();
9028 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9029 driver.capture_primed_dictionary(level);
9030
9031 driver.set_source_size_hint(100 * 1024);
9032 driver.reset(level);
9033 let window_b = driver.window_size();
9034 assert_eq!(
9035 window_a, window_b,
9036 "precondition: both hints must land in the same Level 22 donor tier \
9037 (a={window_a}, b={window_b})"
9038 );
9039
9040 let restored = driver.restore_primed_dictionary(level);
9041 assert!(
9042 restored,
9043 "Level 22 snapshot captured at a 20 KiB hint must be restored into a \
9044 100 KiB hint that resolves to the same donor tier (different ceil-log \
9045 buckets, identical matcher shape)"
9046 );
9047}
9048
9049#[test]
9050fn primed_snapshot_not_restored_across_fast_attach_copy_boundary() {
9051 let mut driver = MatchGeneratorDriver::new(8, 1);
9061 let level = CompressionLevel::Level(1);
9062
9063 driver.set_source_size_hint(8193);
9065 driver.reset(level);
9066 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9067 driver.capture_primed_dictionary(level);
9068
9069 driver.set_source_size_hint(8192);
9071 driver.reset(level);
9072 let restored = driver.restore_primed_dictionary(level);
9073 assert!(
9074 !restored,
9075 "a copy-mode snapshot (8193 B hint) must NOT be restored into an \
9076 attach-mode reset (8192 B hint) that resolves to the same params but a \
9077 different dict-table shape"
9078 );
9079}
9080
9081#[test]
9082fn primed_snapshot_fast_attach_does_not_over_key_non_simple_backends() {
9083 let mut driver = MatchGeneratorDriver::new(8, 1);
9094 let level = CompressionLevel::Level(12);
9095
9096 driver.reset(level);
9098 let window_a = driver.window_size();
9099 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9100 driver.capture_primed_dictionary(level);
9101
9102 driver.set_source_size_hint(64 * 1024 * 1024);
9105 driver.reset(level);
9106 let window_b = driver.window_size();
9107 assert_eq!(
9108 window_a, window_b,
9109 "precondition: the large hint must resolve to the same window as the \
9110 unhinted level (a={window_a}, b={window_b})"
9111 );
9112
9113 let restored = driver.restore_primed_dictionary(level);
9114 assert!(
9115 restored,
9116 "a Row snapshot must restore across an unhinted vs large-hinted \
9117 reset that resolves to the identical matcher — `fast_attach` is a Fast \
9118 backend concept and must not over-key non-Simple shapes"
9119 );
9120}
9121
9122#[cfg(any())] #[test]
9124fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
9125 let mut driver = MatchGeneratorDriver::new(8, 2);
9126 driver.reset(CompressionLevel::Fastest);
9127
9128 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
9131
9132 assert!(
9133 driver
9134 .simple()
9135 .window
9136 .iter()
9137 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
9138 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
9139 );
9140}
9141
9142#[test]
9143fn prime_with_dictionary_counts_only_committed_tail_budget() {
9144 let mut driver = MatchGeneratorDriver::new(8, 1);
9145 driver.reset(CompressionLevel::Fastest);
9146
9147 let before = driver.simple_mut().max_window_size;
9148 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
9150
9151 assert_eq!(
9152 driver.simple_mut().max_window_size,
9153 before + 8,
9154 "retention budget must account only for dictionary bytes actually committed to history"
9155 );
9156}
9157
9158#[test]
9159fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
9160 let mut driver = MatchGeneratorDriver::new(8, 1);
9161 driver.reset(CompressionLevel::Level(3));
9162
9163 let before = driver.dfast_matcher().max_window_size;
9164 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
9167
9168 assert_eq!(
9169 driver.dfast_matcher().max_window_size,
9170 before + 12,
9171 "dfast retention budget should include 4-byte dictionary tails"
9172 );
9173}
9174
9175#[test]
9176fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
9177 let mut driver = MatchGeneratorDriver::new(8, 1);
9178 driver.reset(CompressionLevel::Level(5));
9184
9185 let payload = b"abcdefghijklmnop";
9186 driver.prime_with_dictionary(payload, [1, 4, 8]);
9187
9188 let mut space = driver.get_next_space();
9189 space.clear();
9190 space.extend_from_slice(payload);
9191 driver.commit_space(space);
9192
9193 let mut saw_match = false;
9194 driver.start_matching(|seq| {
9195 if let Sequence::Triple {
9196 literals,
9197 offset,
9198 match_len,
9199 } = seq
9200 && literals.is_empty()
9201 && offset == payload.len()
9202 && match_len >= ROW_MIN_MATCH_LEN
9203 {
9204 saw_match = true;
9205 }
9206 });
9207
9208 assert!(
9209 saw_match,
9210 "row backend should match dictionary-primed history in first full block"
9211 );
9212}
9213
9214#[test]
9215fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
9216 let mut driver = MatchGeneratorDriver::new(8, 1);
9217 driver.reset(CompressionLevel::Level(5));
9218
9219 let base_window = driver.row_matcher().max_window_size;
9220 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
9223
9224 assert_eq!(
9225 driver.row_matcher().max_window_size,
9226 base_window + 8,
9227 "row retained window must exclude uncommitted 1-byte tail"
9228 );
9229}
9230
9231#[test]
9232fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
9233 let mut driver = MatchGeneratorDriver::new(8, 1);
9234 driver.reset(CompressionLevel::Level(5));
9235 driver.row_matcher_mut().max_window_size = 8;
9237 driver.reported_window_size = 8;
9238
9239 let base_window = driver.row_matcher().max_window_size;
9240 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9241 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
9242
9243 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
9244 let mut space = driver.get_next_space();
9245 space.clear();
9246 space.extend_from_slice(block);
9247 driver.commit_space(space);
9248 driver.skip_matching_with_hint(None);
9249 }
9250
9251 assert_eq!(
9252 driver.dictionary_retained_budget, 0,
9253 "dictionary budget should be fully retired once primed dict slices are evicted"
9254 );
9255 assert_eq!(
9256 driver.row_matcher().max_window_size,
9257 base_window,
9258 "retired dictionary budget must not remain reusable for live history"
9259 );
9260}
9261
9262#[test]
9272fn row_get_last_space_then_reset_to_fastest_drops_row_variant() {
9273 let mut driver = MatchGeneratorDriver::new(8, 1);
9274 driver.reset(CompressionLevel::Level(5));
9275 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
9276
9277 let mut space = driver.get_next_space();
9278 space.clear();
9279 space.extend_from_slice(b"row-data");
9280 driver.commit_space(space);
9281
9282 assert_eq!(driver.get_last_space(), b"row-data");
9283
9284 driver.reset(CompressionLevel::Fastest);
9285 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
9286}
9287
9288#[test]
9297fn driver_row_commit_recycles_block_buffer_into_pool() {
9298 let mut driver = MatchGeneratorDriver::new(8, 1);
9299 driver.reset(CompressionLevel::Level(5));
9300 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
9301
9302 let before_pool = driver.vec_pool.len();
9303 let mut space = driver.get_next_space();
9304 space.clear();
9305 space.extend_from_slice(b"row-data-to-recycle");
9306 driver.commit_space(space);
9307
9308 assert!(
9313 driver.vec_pool.len() > before_pool,
9314 "row commit must recycle the committed block buffer into vec_pool \
9315 (before_pool = {before_pool}, after = {})",
9316 driver.vec_pool.len()
9317 );
9318 assert_eq!(driver.get_last_space(), b"row-data-to-recycle");
9320}
9321
9322#[test]
9323fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
9324 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
9325 params.window_log = 22;
9326 let adjusted = adjust_params_for_source_size(params, 0);
9327 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
9328}
9329
9330#[test]
9331fn common_prefix_len_matches_scalar_reference_across_offsets() {
9332 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
9333 a.iter()
9334 .zip(b.iter())
9335 .take_while(|(lhs, rhs)| lhs == rhs)
9336 .count()
9337 }
9338
9339 for total_len in [
9340 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
9341 ] {
9342 let base: Vec<u8> = (0..total_len)
9343 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
9344 .collect();
9345
9346 for start in [0usize, 1, 3] {
9347 if start > total_len {
9348 continue;
9349 }
9350 let a = &base[start..];
9351 let b = a.to_vec();
9352 assert_eq!(
9353 common_prefix_len(a, &b),
9354 scalar_reference(a, &b),
9355 "equal slices total_len={total_len} start={start}"
9356 );
9357
9358 let len = a.len();
9359 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
9360 if mismatch >= len {
9361 continue;
9362 }
9363 let mut altered = b.clone();
9364 altered[mismatch] ^= 0x5A;
9365 assert_eq!(
9366 common_prefix_len(a, &altered),
9367 scalar_reference(a, &altered),
9368 "total_len={total_len} start={start} mismatch={mismatch}"
9369 );
9370 }
9371
9372 if len > 0 {
9373 let mismatch = len - 1;
9374 let mut altered = b.clone();
9375 altered[mismatch] ^= 0xA5;
9376 assert_eq!(
9377 common_prefix_len(a, &altered),
9378 scalar_reference(a, &altered),
9379 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
9380 );
9381 }
9382 }
9383 }
9384
9385 let long = alloc::vec![0xAB; 320];
9386 let shorter = alloc::vec![0xAB; 137];
9387 assert_eq!(
9388 common_prefix_len(&long, &shorter),
9389 scalar_reference(&long, &shorter)
9390 );
9391}
9392
9393#[test]
9394fn row_pick_lazy_returns_none_when_next_is_better() {
9395 let mut matcher = RowMatchGenerator::new(1 << 22);
9396 matcher.configure(ROW_CONFIG);
9397 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
9398 matcher.ensure_tables();
9399
9400 let abs_pos = matcher.history_abs_start + 16;
9401 let best = MatchCandidate {
9402 start: abs_pos,
9403 offset: 8,
9404 match_len: ROW_MIN_MATCH_LEN,
9405 };
9406 assert!(
9407 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
9408 "lazy picker should defer when next position is clearly better"
9409 );
9410}
9411
9412#[test]
9413fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
9414 let mut matcher = RowMatchGenerator::new(1 << 22);
9415 matcher.configure(ROW_CONFIG);
9416 matcher.lazy_depth = 2;
9417 matcher.search_depth = 0;
9418 matcher.offset_hist = [6, 9, 1];
9419
9420 let mut data = alloc::vec![b'x'; 40];
9421 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
9422 matcher.add_data(data, |_| {});
9423 matcher.ensure_tables();
9424
9425 let abs_pos = matcher.history_abs_start + 20;
9426 let best = matcher
9427 .best_match(abs_pos, 0)
9428 .expect("expected baseline repcode match");
9429 assert_eq!(best.offset, 9);
9430 assert_eq!(best.match_len, 6);
9433
9434 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
9435 assert!(next.match_len <= best.match_len);
9436 }
9437
9438 let next2 = matcher
9439 .best_match(abs_pos + 2, 2)
9440 .expect("expected +2 candidate");
9441 assert!(
9442 next2.match_len > best.match_len + 1,
9443 "+2 candidate must be significantly better for depth-2 lazy skip"
9444 );
9445 assert!(
9446 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
9447 "lazy picker should defer when +2 candidate is significantly better"
9448 );
9449}
9450
9451#[test]
9452fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
9453 let mut matcher = RowMatchGenerator::new(1 << 22);
9454 matcher.configure(ROW_CONFIG);
9455 matcher.lazy_depth = 2;
9456 matcher.search_depth = 0;
9457 matcher.offset_hist = [6, 9, 1];
9458
9459 let mut data = alloc::vec![b'x'; 40];
9460 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
9461 matcher.add_data(data, |_| {});
9462 matcher.ensure_tables();
9463
9464 let abs_pos = matcher.history_abs_start + 20;
9465 let best = matcher
9466 .best_match(abs_pos, 0)
9467 .expect("expected baseline repcode match");
9468 assert_eq!(best.offset, 9);
9469 assert_eq!(best.match_len, 6);
9472
9473 let next2 = matcher
9474 .best_match(abs_pos + 2, 2)
9475 .expect("expected +2 candidate");
9476 assert_eq!(next2.match_len, best.match_len + 1);
9477 let chosen = matcher
9478 .pick_lazy_match(abs_pos, 0, Some(best))
9479 .expect("lazy picker should keep current best");
9480 assert_eq!(chosen.start, best.start);
9481 assert_eq!(chosen.offset, best.offset);
9482 assert_eq!(chosen.match_len, best.match_len);
9483}
9484
9485#[test]
9487fn row_hash_and_row_extracts_high_bits() {
9488 let mut matcher = RowMatchGenerator::new(1 << 22);
9489 matcher.configure(ROW_CONFIG);
9490 matcher.add_data(
9491 alloc::vec![
9492 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
9493 0x70, 0x80,
9494 ],
9495 |_| {},
9496 );
9497 matcher.ensure_tables();
9498
9499 let pos = matcher.history_abs_start + 8;
9500 let (row, tag) = matcher
9501 .hash_and_row(pos)
9502 .expect("row hash should be available");
9503
9504 let idx = pos - matcher.history_abs_start;
9505 let concat = matcher.live_history();
9506 let key_len = matcher.mls.min(6);
9510 let value = u64::from_le_bytes(concat[idx..idx + 8].try_into().unwrap())
9511 & ((1u64 << (key_len * 8)) - 1);
9512 let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(matcher.hash_kernel, value);
9513 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
9514 let combined = hash >> (u64::BITS as usize - total_bits);
9515 let expected_row =
9516 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
9517 let expected_tag = combined as u8;
9518
9519 assert_eq!(row, expected_row);
9520 assert_eq!(tag, expected_tag);
9521}
9522
9523#[test]
9524fn row_repcode_skips_candidate_before_history_start() {
9525 let mut matcher = RowMatchGenerator::new(1 << 22);
9526 matcher.configure(ROW_CONFIG);
9527 matcher.history = alloc::vec![b'a'; 20];
9528 matcher.history_start = 0;
9529 matcher.history_abs_start = 10;
9530 matcher.offset_hist = [3, 0, 0];
9531
9532 assert!(matcher.repcode_candidate(12, 1).is_none());
9533}
9534
9535#[test]
9536fn row_repcode_returns_none_when_position_too_close_to_history_end() {
9537 let mut matcher = RowMatchGenerator::new(1 << 22);
9538 matcher.configure(ROW_CONFIG);
9539 matcher.history = b"abcde".to_vec();
9540 matcher.history_start = 0;
9541 matcher.history_abs_start = 0;
9542 matcher.offset_hist = [1, 0, 0];
9543
9544 assert!(matcher.repcode_candidate(4, 1).is_none());
9545}
9546
9547#[cfg(all(feature = "std", target_arch = "x86_64"))]
9548#[test]
9549fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
9550 use crate::encoding::fastpath::{self, FastpathKernel};
9551 if !is_x86_feature_detected!("sse4.2") {
9552 return;
9553 }
9554 let v = 0x0123_4567_89AB_CDEFu64;
9555 let accelerated = unsafe { fastpath::sse42::hash_mix_u64(v) };
9557 let dispatched = fastpath::dispatch_hash_mix_u64(v);
9559 let kernel = fastpath::select_kernel();
9560 if kernel == FastpathKernel::Sse42 {
9561 assert_eq!(dispatched, accelerated);
9562 } else {
9563 assert_eq!(dispatched, accelerated, "AVX2/SSE4.2 share CRC32 mix");
9565 }
9566}
9567
9568#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
9569#[test]
9570fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
9571 use crate::encoding::fastpath;
9572 if !is_aarch64_feature_detected!("crc") {
9573 return;
9574 }
9575 let v = 0x0123_4567_89AB_CDEFu64;
9576 let accelerated = unsafe { fastpath::neon::hash_mix_u64(v) };
9578 let dispatched = fastpath::dispatch_hash_mix_u64(v);
9579 assert_eq!(dispatched, accelerated);
9580}
9581
9582#[test]
9583fn hc_hash3_position_matches_donor_formula() {
9584 let bytes = [b'a', b'b', b'c', b'd'];
9585 let read32 = u32::from_le_bytes(bytes);
9586 let expected = (((read32 << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - HC3_HASH_LOG)) as usize;
9587 assert_eq!(
9588 super::match_table::storage::MatchTable::hash3_position(&bytes, HC3_HASH_LOG),
9589 expected
9590 );
9591}
9592
9593#[test]
9594fn hc_hash_position_matches_donor_hash4_formula() {
9595 let mut hc = HcMatchGenerator::new(1 << 20);
9596 hc.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
9597 let bytes = [b'a', b'b', b'c', b'd'];
9598 let read32 = u32::from_le_bytes(bytes);
9599 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
9600 assert_eq!(hc.table.hash_position(&bytes), expected);
9601}
9602
9603#[test]
9604fn btultra2_main_hash_uses_donor_hash4_formula() {
9605 let mut hc = HcMatchGenerator::new(1 << 20);
9606 hc.configure(
9607 BTULTRA2_HC_CONFIG_L22,
9608 super::strategy::StrategyTag::BtUltra2,
9609 27,
9610 );
9611 let bytes = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
9612 let read32 = u32::from_le_bytes(bytes[..4].try_into().unwrap());
9613 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
9614 let actual = super::match_table::storage::MatchTable::hash_position_with_mls(
9615 &bytes,
9616 hc.table.hash_log,
9617 super::bt::BtMatcher::HASH_MLS,
9618 );
9619 assert_eq!(actual, expected);
9620}
9621
9622#[test]
9623fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
9624 let mut matcher = RowMatchGenerator::new(1 << 22);
9625 matcher.configure(ROW_CONFIG);
9626 matcher.history = alloc::vec![b'a'; ROW_MIN_MATCH_LEN - 1];
9631 matcher.history_start = 0;
9632 matcher.history_abs_start = 0;
9633
9634 assert!(matcher.row_candidate(0, 0).is_none());
9635}
9636
9637#[test]
9638fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
9639 let mut hc = HcMatchGenerator::new(32);
9640 hc.table.history = b"abc".to_vec();
9641 hc.table.history_start = 0;
9642 hc.table.history_abs_start = 0;
9643 hc.table.ensure_tables();
9644
9645 let candidates = hc.hc.chain_candidates(&hc.table, 0);
9646 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
9647}
9648
9649#[test]
9650fn hc_reset_advances_floor_past_prior_frame_entries() {
9651 use super::match_table::storage::MatchTable;
9652 let mut hc = HcMatchGenerator::new(32);
9653 hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
9654 hc.table.ensure_tables();
9655 hc.table.insert_positions(0, 6);
9657 let prev_end = hc.table.history_abs_end();
9658 assert_eq!(prev_end, 10);
9659 assert!(hc.table.hash_table.iter().any(|&v| v != HC_EMPTY));
9660
9661 hc.reset(|_| {});
9662
9663 assert_eq!(hc.table.history_abs_start, prev_end);
9669 for &slot in hc.table.hash_table.iter() {
9670 if let Some(candidate_abs) =
9671 MatchTable::stored_abs_position_fast(slot, hc.table.position_base, hc.table.index_shift)
9672 {
9673 assert!(
9674 candidate_abs < hc.table.history_abs_start,
9675 "a prior-frame entry must resolve below the advanced floor"
9676 );
9677 }
9678 }
9679}
9680
9681#[test]
9682fn hc_reset_full_zeroes_when_floor_would_cross_ceiling() {
9683 use super::match_table::storage::REBASE_RESET_FLOOR_CEILING;
9684 let mut hc = HcMatchGenerator::new(32);
9685 hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
9686 hc.table.ensure_tables();
9687 hc.table.hash_table.fill(123);
9688 hc.table.chain_table.fill(456);
9689 hc.table.history_abs_start = REBASE_RESET_FLOOR_CEILING;
9694
9695 hc.reset(|_| {});
9696
9697 assert_eq!(hc.table.history_abs_start, 0);
9698 assert_eq!(hc.table.position_base, 0);
9699 assert!(hc.table.hash_table.iter().all(|&v| v == HC_EMPTY));
9700 assert!(hc.table.chain_table.iter().all(|&v| v == HC_EMPTY));
9701}
9702
9703#[test]
9704fn hc_start_matching_returns_early_for_empty_current_block() {
9705 let mut hc = HcMatchGenerator::new(32);
9706 hc.table.add_data(Vec::new(), |_| {});
9707 let mut called = false;
9708 hc.start_matching(|_| called = true);
9709 assert!(!called, "empty current block should not emit sequences");
9710}
9711
9712#[cfg(test)]
9713fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
9714 let mut out = Vec::with_capacity(len);
9715 let mut state = seed;
9716 for _ in 0..len {
9717 state ^= state << 13;
9718 state ^= state >> 7;
9719 state ^= state << 17;
9720 out.push((state >> 40) as u8);
9721 }
9722 out
9723}
9724
9725#[cfg(test)]
9726fn level22_donor_block_ranges(data: &[u8]) -> Vec<(usize, usize)> {
9727 let mut ranges = Vec::new();
9728 let mut cursor = 0usize;
9729 let mut savings = 0i64;
9730 while cursor < data.len() {
9731 let remaining = data.len() - cursor;
9732 let candidate_len = remaining.min(HC_BLOCKSIZE_MAX);
9733 let block_len = crate::encoding::frame_compressor::optimal_block_size(
9734 CompressionLevel::Level(22),
9735 &data[cursor..cursor + candidate_len],
9736 remaining,
9737 HC_BLOCKSIZE_MAX,
9738 savings,
9739 )
9740 .min(candidate_len)
9741 .max(1);
9742 ranges.push((cursor, block_len));
9743 cursor += block_len;
9744 if cursor >= HC_BLOCKSIZE_MAX {
9748 savings = 3;
9749 }
9750 }
9751 ranges
9752}
9753
9754#[cfg(test)]
9755fn merge_block_delimiters_like_donor(
9756 sequences: Vec<(usize, usize, usize)>,
9757) -> Vec<(usize, usize, usize)> {
9758 let mut out = Vec::with_capacity(sequences.len());
9759 let mut pending_lits = 0usize;
9760 for (lit_len, offset, match_len) in sequences {
9761 if offset == 0 && match_len == 0 {
9762 pending_lits = pending_lits.saturating_add(lit_len);
9763 continue;
9764 }
9765 out.push((lit_len.saturating_add(pending_lits), offset, match_len));
9766 pending_lits = 0;
9767 }
9768 if pending_lits > 0 {
9769 out.push((pending_lits, 0, 0));
9770 }
9771 out
9772}
9773
9774#[cfg(test)]
9775fn collect_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
9776 merge_block_delimiters_like_donor(collect_level22_sequences_with_delimiters(data))
9777 .into_iter()
9778 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
9779 .collect()
9780}
9781
9782#[cfg(test)]
9783fn collect_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
9784 let mut driver = MatchGeneratorDriver::new(HC_BLOCKSIZE_MAX, 1);
9785 driver.set_source_size_hint(data.len() as u64);
9786 driver.reset(CompressionLevel::Level(22));
9787
9788 let mut sequences = Vec::new();
9789 for (chunk_start, chunk_len) in level22_donor_block_ranges(data) {
9790 let chunk = &data[chunk_start..chunk_start + chunk_len];
9791 let mut space = driver.get_next_space();
9792 space[..chunk.len()].copy_from_slice(chunk);
9793 space.truncate(chunk.len());
9794 driver.commit_space(space);
9795 driver.start_matching(|seq| {
9796 let entry = match seq {
9797 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
9798 Sequence::Triple {
9799 literals,
9800 offset,
9801 match_len,
9802 } => (literals.len(), offset, match_len),
9803 };
9804 sequences.push(entry);
9805 });
9806 }
9807 sequences
9808}
9809
9810#[cfg(test)]
9811fn donor_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
9812 merge_block_delimiters_like_donor(donor_level22_sequences_with_delimiters(data))
9813 .into_iter()
9814 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
9815 .collect()
9816}
9817
9818#[cfg(test)]
9819fn donor_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
9820 use zstd::zstd_safe;
9821 use zstd::zstd_safe::zstd_sys;
9822
9823 fn assert_zstd_ok(code: usize, context: &str) {
9824 assert_eq!(
9825 unsafe { zstd_sys::ZSTD_isError(code) },
9826 0,
9827 "{context} failed: {}",
9828 zstd_safe::get_error_name(code)
9829 );
9830 }
9831
9832 unsafe {
9833 let cctx = zstd_sys::ZSTD_createCCtx();
9834 assert!(!cctx.is_null(), "ZSTD_createCCtx returned null");
9835
9836 assert_zstd_ok(
9837 zstd_sys::ZSTD_CCtx_setParameter(
9838 cctx,
9839 zstd_sys::ZSTD_cParameter::ZSTD_c_compressionLevel,
9840 22,
9841 ),
9842 "ZSTD_c_compressionLevel",
9843 );
9844
9845 let seq_capacity = zstd_safe::sequence_bound(data.len());
9846 let mut seqs = alloc::vec![
9847 zstd_sys::ZSTD_Sequence {
9848 offset: 0,
9849 litLength: 0,
9850 matchLength: 0,
9851 rep: 0,
9852 };
9853 seq_capacity
9854 ];
9855
9856 let seq_count = zstd_sys::ZSTD_generateSequences(
9857 cctx,
9858 seqs.as_mut_ptr(),
9859 seqs.len(),
9860 data.as_ptr().cast(),
9861 data.len(),
9862 );
9863 assert_zstd_ok(seq_count, "ZSTD_generateSequences");
9864 let rc = zstd_sys::ZSTD_freeCCtx(cctx);
9865 assert_eq!(rc, 0, "ZSTD_freeCCtx failed");
9866
9867 seqs.truncate(seq_count);
9868 seqs.into_iter()
9869 .map(|seq| {
9870 (
9871 seq.litLength as usize,
9872 seq.offset as usize,
9873 seq.matchLength as usize,
9874 )
9875 })
9876 .collect()
9877 }
9878}
9879
9880#[test]
9881fn level22_sequences_match_donor_on_corpus_proxy() {
9882 let data = include_bytes!("../../decodecorpus_files/z000033");
9883 assert_level22_sequences_match_donor(data);
9884}
9885
9886#[test]
9887fn level22_sequences_match_donor_on_small_corpus_proxy() {
9888 let data = include_bytes!("../../decodecorpus_files/z000030");
9889 assert_level22_sequences_match_donor(data);
9890}
9891
9892#[cfg(test)]
9893fn assert_level22_sequences_match_donor(data: &[u8]) {
9894 let rust = collect_level22_sequences(data);
9895 let donor = donor_level22_sequences(data);
9896
9897 if rust != donor {
9898 let first_diff = rust
9899 .iter()
9900 .zip(donor.iter())
9901 .position(|(lhs, rhs)| lhs != rhs)
9902 .unwrap_or_else(|| rust.len().min(donor.len()));
9903 let rust_pos = rust
9904 .iter()
9905 .take(first_diff)
9906 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
9907 let donor_pos = donor
9908 .iter()
9909 .take(first_diff)
9910 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
9911 let start = first_diff.saturating_sub(4);
9912 let rust_window = &rust[start..rust.len().min(first_diff + 4)];
9913 let donor_window = &donor[start..donor.len().min(first_diff + 4)];
9914 let mut reps = [1u32, 4, 8];
9915 for (lit_len, offset, _) in rust.iter().take(first_diff) {
9916 let _ = encode_offset_with_history(*offset as u32, *lit_len as u32, &mut reps);
9917 }
9918 panic!(
9919 "level22 sequence path diverged at idx {}: rust={:?} donor={:?} (rust_len={} donor_len={} rust_pos={} donor_pos={} reps_before={:?} rust_window={:?} donor_window={:?} block_ranges={:?})",
9920 first_diff,
9921 rust.get(first_diff),
9922 donor.get(first_diff),
9923 rust.len(),
9924 donor.len(),
9925 rust_pos,
9926 donor_pos,
9927 reps,
9928 rust_window,
9929 donor_window,
9930 level22_donor_block_ranges(data)
9931 .into_iter()
9932 .filter(|(start, len)| *start <= rust_pos && rust_pos < start + len)
9933 .collect::<Vec<_>>(),
9934 );
9935 }
9936}
9937
9938#[test]
9939fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
9940 let mut matcher = HcMatchGenerator::new(1 << 22);
9941 let tail = b"Qz9kLm2Rp";
9942 let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
9943 let tail_start = first.len() - tail.len();
9944 first[tail_start..].copy_from_slice(tail);
9945 matcher.table.add_data(first.clone(), |_| {});
9946 matcher.skip_matching(Some(true));
9947
9948 let mut second = tail.to_vec();
9949 second.extend_from_slice(b"after-tail-literals");
9950 matcher.table.add_data(second, |_| {});
9951
9952 let mut first_sequence = None;
9953 matcher.start_matching(|seq| {
9954 if first_sequence.is_some() {
9955 return;
9956 }
9957 first_sequence = Some(match seq {
9958 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
9959 Sequence::Triple {
9960 literals,
9961 offset,
9962 match_len,
9963 } => (literals.len(), offset, match_len),
9964 });
9965 });
9966
9967 let (literals_len, offset, match_len) =
9968 first_sequence.expect("expected at least one sequence after sparse skip");
9969 assert_eq!(
9970 literals_len, 0,
9971 "first sequence should start at block boundary"
9972 );
9973 assert_eq!(
9974 offset,
9975 tail.len(),
9976 "first match should reference previous tail"
9977 );
9978 assert!(
9979 match_len >= tail.len(),
9980 "tail-aligned cross-block match must be preserved"
9981 );
9982}
9983
9984#[test]
9985fn btultra2_sparse_skip_matching_preserves_tail_cross_block_match() {
9986 let mut matcher = HcMatchGenerator::new(1 << 20);
9987 matcher.configure(
9988 BTULTRA2_HC_CONFIG_L22,
9989 super::strategy::StrategyTag::BtUltra2,
9990 20,
9991 );
9992 let tail = b"Bt9kLm2Rp";
9993 let mut first = deterministic_high_entropy_bytes(0xA9C3_7F21_D4E8_510B, 4096);
9994 let tail_start = first.len() - tail.len();
9995 first[tail_start..].copy_from_slice(tail);
9996 matcher.table.add_data(first, |_| {});
9997 matcher.skip_matching(Some(true));
9998
9999 let mut second = tail.to_vec();
10000 second.extend_from_slice(b"after-tail-literals");
10001 matcher.table.add_data(second, |_| {});
10002
10003 let mut first_sequence = None;
10004 matcher.start_matching(|seq| {
10005 if first_sequence.is_some() {
10006 return;
10007 }
10008 first_sequence = Some(match seq {
10009 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
10010 Sequence::Triple {
10011 literals,
10012 offset,
10013 match_len,
10014 } => (literals.len(), offset, match_len),
10015 });
10016 });
10017
10018 let (literals_len, offset, match_len) =
10019 first_sequence.expect("expected at least one sequence after sparse BT skip");
10020 assert_eq!(
10021 literals_len, 0,
10022 "BT sparse skip should preserve an immediate boundary match"
10023 );
10024 assert_eq!(
10025 offset,
10026 tail.len(),
10027 "first BT match should reference previous tail"
10028 );
10029 assert!(
10030 match_len >= tail.len(),
10031 "BT sparse skip must seed the dense tail for cross-block matching"
10032 );
10033}
10034
10035#[test]
10036fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
10037 let mut matcher = HcMatchGenerator::new(1 << 22);
10038 let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
10039 matcher.table.add_data(first.clone(), |_| {});
10040 matcher.skip_matching(Some(true));
10041
10042 let current_len = first.len();
10043 let current_abs_start =
10044 matcher.table.history_abs_start + matcher.table.window_size - current_len;
10045 let current_abs_end = current_abs_start + current_len;
10046 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
10047 let tail_start = current_abs_end
10048 .saturating_sub(dense_tail)
10049 .max(matcher.table.history_abs_start)
10050 .max(current_abs_start);
10051
10052 let overlap_pos = (tail_start..current_abs_end)
10053 .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
10054 .expect("fixture should contain at least one sparse-grid overlap in dense tail");
10055
10056 let rel = matcher
10057 .table
10058 .relative_position(overlap_pos)
10059 .expect("overlap position should be representable as relative position");
10060 let chain_idx = rel as usize & ((1 << matcher.table.chain_log) - 1);
10061 assert_ne!(
10062 matcher.table.chain_table[chain_idx],
10063 rel + 1,
10064 "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
10065 );
10066}
10067
10068#[test]
10069fn hc_compact_history_drains_when_threshold_crossed() {
10070 let mut hc = HcMatchGenerator::new(8);
10071 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
10072 hc.table.history_start = 16;
10073 hc.table.compact_history();
10074 assert_eq!(hc.table.history_start, 0);
10075 assert_eq!(hc.table.history, b"qrstuvwxyz");
10076}
10077
10078#[test]
10079fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
10080 let mut hc = HcMatchGenerator::new(32);
10081 hc.table.history = b"abcdefghijklmnop".to_vec();
10082 hc.table.history_abs_start = 0;
10083 hc.table.position_base = 1;
10084 hc.table.ensure_tables();
10085 let before_hash = hc.table.hash_table.clone();
10086 let before_chain = hc.table.chain_table.clone();
10087
10088 hc.table.insert_position_no_rebase(0);
10089
10090 assert_eq!(hc.table.hash_table, before_hash);
10091 assert_eq!(hc.table.chain_table, before_chain);
10092}
10093
10094#[test]
10095fn hc_insert_positions_advances_next_to_update3_for_contiguous_range() {
10096 let mut hc = HcMatchGenerator::new(64);
10097 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
10098 hc.table.history_start = 0;
10099 hc.table.history_abs_start = 0;
10100 hc.table.position_base = 0;
10101 hc.table.ensure_tables();
10102 hc.table.next_to_update3 = 0;
10103
10104 hc.table.insert_positions(0, 9);
10105
10106 assert_eq!(
10107 hc.table.next_to_update3, 9,
10108 "contiguous insert_positions should advance hash3 update cursor"
10109 );
10110}
10111
10112#[test]
10113fn hc_insert_positions_with_step_keeps_next_to_update3_cursor_for_sparse_ranges() {
10114 let mut hc = HcMatchGenerator::new(64);
10115 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
10116 hc.table.history_start = 0;
10117 hc.table.history_abs_start = 0;
10118 hc.table.position_base = 0;
10119 hc.table.ensure_tables();
10120 hc.table.next_to_update3 = 0;
10121
10122 hc.table.insert_positions_with_step(0, 16, 4);
10123
10124 assert_eq!(
10125 hc.table.next_to_update3, 0,
10126 "sparse insert_positions_with_step must not mark skipped positions as hash3-updated"
10127 );
10128}
10129
10130#[cfg(any())]
10131#[test]
10133fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
10134 let mut driver = MatchGeneratorDriver::new(8, 1);
10135 driver.reset(CompressionLevel::Fastest);
10136 driver.simple_mut().max_window_size = 8;
10139 driver.reported_window_size = 8;
10140
10141 let base_window = driver.simple_mut().max_window_size;
10142 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10143 assert_eq!(driver.simple_mut().max_window_size, base_window + 24);
10144
10145 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
10146 let mut space = driver.get_next_space();
10147 space.clear();
10148 space.extend_from_slice(block);
10149 driver.commit_space(space);
10150 driver.skip_matching_with_hint(None);
10151 }
10152
10153 assert_eq!(
10154 driver.dictionary_retained_budget, 0,
10155 "dictionary budget should be fully retired once primed dict slices are evicted"
10156 );
10157 assert_eq!(
10158 driver.simple_mut().max_window_size,
10159 base_window,
10160 "retired dictionary budget must not remain reusable for live history"
10161 );
10162}
10163
10164#[test]
10165fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
10166 let mut driver = MatchGeneratorDriver::new(8, 1);
10167 driver.reset(CompressionLevel::Level(3));
10168 driver.dfast_matcher_mut().max_window_size = 8;
10171 driver.reported_window_size = 8;
10172
10173 let base_window = driver.dfast_matcher().max_window_size;
10174 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10175 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
10176
10177 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
10178 let mut space = driver.get_next_space();
10179 space.clear();
10180 space.extend_from_slice(block);
10181 driver.commit_space(space);
10182 driver.skip_matching_with_hint(None);
10183 }
10184
10185 assert_eq!(
10186 driver.dictionary_retained_budget, 0,
10187 "dictionary budget should be fully retired once primed dict slices are evicted"
10188 );
10189 assert_eq!(
10190 driver.dfast_matcher().max_window_size,
10191 base_window,
10192 "retired dictionary budget must not remain reusable for live history"
10193 );
10194}
10195
10196#[test]
10197fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
10198 let mut driver = MatchGeneratorDriver::new(8, 1);
10199 driver.reset_on_hc_lazy(CompressionLevel::Better);
10202
10203 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
10204
10205 let mut space = driver.get_next_space();
10206 space.clear();
10207 space.extend_from_slice(b"abcdefgh");
10210 driver.commit_space(space);
10211
10212 let mut saw_match = false;
10213 driver.start_matching(|seq| {
10214 if let Sequence::Triple {
10215 literals,
10216 offset,
10217 match_len,
10218 } = seq
10219 && literals.is_empty()
10220 && offset == 8
10221 && match_len >= HC_MIN_MATCH_LEN
10222 {
10223 saw_match = true;
10224 }
10225 });
10226
10227 assert!(
10228 saw_match,
10229 "hash-chain backend should match dictionary-primed history in first full block"
10230 );
10231}
10232
10233#[test]
10234fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
10235 let mut driver = MatchGeneratorDriver::new(8, 1);
10236 driver.reset_on_hc_lazy(CompressionLevel::Better);
10237 driver.hc_matcher_mut().table.max_window_size = 8;
10239 driver.reported_window_size = 8;
10240
10241 let base_window = driver.hc_matcher().table.max_window_size;
10242 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10243 assert_eq!(driver.hc_matcher().table.max_window_size, base_window + 24);
10244
10245 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
10246 let mut space = driver.get_next_space();
10247 space.clear();
10248 space.extend_from_slice(block);
10249 driver.commit_space(space);
10250 driver.skip_matching_with_hint(None);
10251 }
10252
10253 assert_eq!(
10254 driver.dictionary_retained_budget, 0,
10255 "dictionary budget should be fully retired once primed dict slices are evicted"
10256 );
10257 assert_eq!(
10258 driver.hc_matcher().table.max_window_size,
10259 base_window,
10260 "retired dictionary budget must not remain reusable for live history"
10261 );
10262}
10263
10264#[test]
10265fn hc_commit_without_eviction_retires_no_dictionary_budget() {
10266 let mut driver = MatchGeneratorDriver::new(8, 1);
10274 driver.reset_on_hc_lazy(CompressionLevel::Better);
10275 driver.hc_matcher_mut().table.max_window_size = 1 << 20;
10277 driver.reported_window_size = 1 << 20;
10278 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10279 let budget_after_prime = driver.dictionary_retained_budget;
10280 assert!(
10281 budget_after_prime > 0,
10282 "priming must retain a non-zero dictionary budget"
10283 );
10284
10285 let mut space = driver.get_next_space();
10286 space.clear();
10287 space.extend_from_slice(b"AAAAAAAA");
10288 driver.commit_space(space);
10289 driver.skip_matching_with_hint(None);
10290
10291 assert_eq!(
10292 driver.dictionary_retained_budget, budget_after_prime,
10293 "a commit that evicts nothing must retire no dictionary budget"
10294 );
10295}
10296
10297#[test]
10298fn row_commit_without_eviction_retires_no_dictionary_budget() {
10299 let mut driver = MatchGeneratorDriver::new(8, 1);
10308 driver.reset(CompressionLevel::Level(5));
10309 assert!(matches!(driver.storage, MatcherStorage::Row(_)));
10310 driver.row_matcher_mut().max_window_size = 1 << 20;
10312 driver.reported_window_size = 1 << 20;
10313 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10314 let budget_after_prime = driver.dictionary_retained_budget;
10315 assert!(
10316 budget_after_prime > 0,
10317 "priming must retain a non-zero dictionary budget"
10318 );
10319
10320 let mut space = driver.get_next_space();
10321 space.clear();
10322 space.extend_from_slice(b"AAAAAAAA");
10323 driver.commit_space(space);
10324 driver.skip_matching_with_hint(None);
10325
10326 assert_eq!(
10327 driver.dictionary_retained_budget, budget_after_prime,
10328 "a Row commit that evicts nothing must retire no dictionary budget"
10329 );
10330}
10331
10332#[test]
10333fn hc_rebases_positions_after_u32_boundary() {
10334 let mut matcher = HcMatchGenerator::new(64);
10335 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10336 matcher.table.ensure_tables();
10337 matcher.table.position_base = 0;
10338 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
10339 Ok(value) => value,
10340 Err(_) => return,
10341 };
10342 matcher.table.history_abs_start = history_abs_start;
10345 matcher.skip_matching(None);
10346 assert_eq!(
10347 matcher.table.position_base, matcher.table.history_abs_start,
10348 "rebase should anchor to the oldest live absolute position"
10349 );
10350
10351 assert!(
10352 matcher
10353 .table
10354 .hash_table
10355 .iter()
10356 .any(|entry| *entry != HC_EMPTY),
10357 "HC hash table should still be populated after crossing u32 boundary"
10358 );
10359
10360 let abs_pos = matcher.table.history_abs_start + 10;
10362 let candidates = matcher.hc.chain_candidates(&matcher.table, abs_pos);
10363 assert!(
10364 candidates.iter().any(|candidate| *candidate != usize::MAX),
10365 "chain_candidates should return valid matches after rebase"
10366 );
10367}
10368
10369#[cfg(target_pointer_width = "64")]
10375#[test]
10376fn row_rebases_positions_after_u32_boundary() {
10377 let mut m = RowMatchGenerator::new(64);
10384 m.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10385
10386 let near_ceiling = (u32::MAX as usize) - 16;
10389 m.history_abs_start = near_ceiling;
10390
10391 m.add_data(b"fghij".to_vec(), |_| {});
10394
10395 assert!(
10396 m.history_abs_start < near_ceiling,
10397 "add_data must rebase the absolute origin down when the cursor nears \
10398 u32::MAX (got {})",
10399 m.history_abs_start
10400 );
10401 assert!(
10402 (m.history_abs_start + m.window_size) < u32::MAX as usize,
10403 "after rebase the live window must fit below the u32 position ceiling"
10404 );
10405}
10406
10407#[test]
10408fn hc_rebase_rebuilds_only_inserted_prefix() {
10409 let mut matcher = HcMatchGenerator::new(64);
10410 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10411 matcher.table.ensure_tables();
10412 matcher.table.position_base = 0;
10413 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
10414 Ok(value) => value,
10415 Err(_) => return,
10416 };
10417 matcher.table.history_abs_start = history_abs_start;
10418 let abs_pos = matcher.table.history_abs_start + 6;
10419
10420 let mut expected = HcMatchGenerator::new(64);
10421 expected.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10422 expected.table.ensure_tables();
10423 expected.table.history_abs_start = history_abs_start;
10424 expected.table.position_base = expected.table.history_abs_start;
10425 expected.table.hash_table.fill(HC_EMPTY);
10426 expected.table.chain_table.fill(HC_EMPTY);
10427 for pos in expected.table.history_abs_start..abs_pos {
10428 expected.table.insert_position_no_rebase(pos);
10429 }
10430
10431 matcher.table.maybe_rebase_positions(abs_pos);
10432
10433 assert_eq!(
10434 matcher.table.position_base, matcher.table.history_abs_start,
10435 "rebase should still anchor to the oldest live absolute position"
10436 );
10437 assert_eq!(
10438 matcher.table.hash_table, expected.table.hash_table,
10439 "rebase must rebuild only positions already inserted before abs_pos"
10440 );
10441 assert_eq!(
10442 matcher.table.chain_table, expected.table.chain_table,
10443 "future positions must not be pre-seeded into HC chains during rebase"
10444 );
10445}
10446
10447#[cfg(any())] #[test]
10449fn suffix_store_with_single_slot_does_not_panic_on_keying() {
10450 let mut suffixes = SuffixStore::with_capacity(1);
10451 suffixes.insert(b"abcde", 0);
10452 assert!(suffixes.contains_key(b"abcde"));
10453 assert_eq!(suffixes.get(b"abcde"), Some(0));
10454}
10455
10456#[cfg(any())]
10457#[test]
10459fn fastest_reset_uses_interleaved_hash_fill_step() {
10460 let mut driver = MatchGeneratorDriver::new(32, 2);
10461
10462 driver.reset(CompressionLevel::Uncompressed);
10463 assert_eq!(driver.simple().hash_fill_step, 1);
10464
10465 driver.reset(CompressionLevel::Fastest);
10466 assert_eq!(driver.simple().hash_fill_step, FAST_HASH_FILL_STEP);
10467
10468 driver.reset(CompressionLevel::Better);
10471 assert_eq!(
10472 driver.active_backend(),
10473 super::strategy::BackendTag::HashChain
10474 );
10475 assert_eq!(driver.window_size(), (1u64 << 23));
10476 assert_eq!(driver.hc_matcher().hc.lazy_depth, 2);
10477}
10478
10479#[cfg(any())] #[test]
10481fn simple_matcher_updates_offset_history_after_emitting_match() {
10482 let mut matcher = MatchGenerator::new(64);
10483 matcher.add_data(
10484 b"abcdeabcdeabcde".to_vec(),
10485 SuffixStore::with_capacity(64),
10486 |_, _| {},
10487 );
10488
10489 assert!(matcher.next_sequence(|seq| {
10490 assert_eq!(
10491 seq,
10492 Sequence::Triple {
10493 literals: b"abcde",
10494 offset: 5,
10495 match_len: 10,
10496 }
10497 );
10498 }));
10499 assert_eq!(matcher.offset_hist, [5, 1, 4]);
10500}
10501
10502#[cfg(any())] #[test]
10504fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
10505 let mut matcher = MatchGenerator::new(64);
10506 matcher.add_data(
10507 b"abcdefghijabcdefghij".to_vec(),
10508 SuffixStore::with_capacity(64),
10509 |_, _| {},
10510 );
10511
10512 matcher.suffix_idx = 10;
10513 matcher.last_idx_in_sequence = 10;
10514 matcher.offset_hist = [99, 10, 4];
10515
10516 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10517 assert_eq!(candidate, Some((10, 10)));
10518}
10519
10520#[cfg(any())] #[test]
10522fn simple_matcher_repcode_can_target_previous_window_entry() {
10523 let mut matcher = MatchGenerator::new(64);
10524 matcher.add_data(
10525 b"abcdefghij".to_vec(),
10526 SuffixStore::with_capacity(64),
10527 |_, _| {},
10528 );
10529 matcher.skip_matching();
10530 matcher.add_data(
10531 b"abcdefghij".to_vec(),
10532 SuffixStore::with_capacity(64),
10533 |_, _| {},
10534 );
10535
10536 matcher.offset_hist = [99, 10, 4];
10537
10538 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
10539 assert_eq!(candidate, Some((10, 10)));
10540}
10541
10542#[cfg(any())] #[test]
10544fn simple_matcher_zero_literal_repcode_checks_rep2() {
10545 let mut matcher = MatchGenerator::new(64);
10546 matcher.add_data(
10547 b"abcdefghijabcdefghij".to_vec(),
10548 SuffixStore::with_capacity(64),
10549 |_, _| {},
10550 );
10551 matcher.suffix_idx = 10;
10552 matcher.last_idx_in_sequence = 10;
10553 matcher.offset_hist = [99, 4, 10];
10555
10556 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10557 assert_eq!(candidate, Some((10, 10)));
10558}
10559
10560#[cfg(any())] #[test]
10562fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
10563 let mut matcher = MatchGenerator::new(64);
10564 matcher.add_data(
10565 b"abcdefghijabcdefghij".to_vec(),
10566 SuffixStore::with_capacity(64),
10567 |_, _| {},
10568 );
10569 matcher.suffix_idx = 10;
10570 matcher.last_idx_in_sequence = 10;
10571 matcher.offset_hist = [11, 4, 99];
10573
10574 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10575 assert_eq!(candidate, Some((10, 10)));
10576}
10577
10578#[cfg(any())] #[test]
10580fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
10581 let mut matcher = MatchGenerator::new(64);
10582 matcher.add_data(
10583 b"abcdefghij".to_vec(),
10584 SuffixStore::with_capacity(64),
10585 |_, _| {},
10586 );
10587 matcher.skip_matching();
10588 matcher.add_data(
10589 b"klmnopqrst".to_vec(),
10590 SuffixStore::with_capacity(64),
10591 |_, _| {},
10592 );
10593 matcher.suffix_idx = 3;
10594
10595 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
10596 assert_eq!(candidate, None);
10597}
10598
10599#[cfg(any())] #[test]
10601fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
10602 let mut matcher = MatchGenerator::new(64);
10603 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10604 matcher.add_data(
10605 b"abcdefghijklmnop".to_vec(),
10606 SuffixStore::with_capacity(64),
10607 |_, _| {},
10608 );
10609 matcher.skip_matching();
10610 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
10611
10612 assert!(matcher.next_sequence(|seq| {
10613 assert_eq!(
10614 seq,
10615 Sequence::Triple {
10616 literals: b"",
10617 offset: 15,
10618 match_len: 5,
10619 }
10620 );
10621 }));
10622 assert!(!matcher.next_sequence(|_| {}));
10623}
10624
10625#[cfg(any())] #[test]
10627fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
10628 let mut matcher = MatchGenerator::new(128);
10629 let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
10630 let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
10631 let tail_start = first.len() - MIN_MATCH_LEN;
10632 let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
10633 matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
10634
10635 matcher.skip_matching_with_hint(Some(true));
10636
10637 matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
10639 let mut sparse_first_is_literals = None;
10640 assert!(matcher.next_sequence(|seq| {
10641 if sparse_first_is_literals.is_none() {
10642 sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
10643 }
10644 }));
10645 assert!(
10646 sparse_first_is_literals.unwrap_or(false),
10647 "sparse-start probe should not produce an immediate match"
10648 );
10649
10650 let mut matcher = MatchGenerator::new(128);
10652 matcher.add_data(
10653 b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
10654 SuffixStore::with_capacity(256),
10655 |_, _| {},
10656 );
10657 matcher.skip_matching_with_hint(Some(true));
10658 matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
10659 let mut tail_first_is_immediate_match = None;
10660 assert!(matcher.next_sequence(|seq| {
10661 if tail_first_is_immediate_match.is_none() {
10662 tail_first_is_immediate_match =
10663 Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
10664 }
10665 }));
10666 assert!(
10667 tail_first_is_immediate_match.unwrap_or(false),
10668 "dense tail probe should match immediately at block start"
10669 );
10670}
10671
10672#[cfg(any())] #[test]
10674fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
10675 let mut matcher = MatchGenerator::new(64);
10676 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10677 matcher.add_data(
10678 b"01234abcde".to_vec(),
10679 SuffixStore::with_capacity(64),
10680 |_, _| {},
10681 );
10682 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
10683
10684 let last = matcher.window.last().unwrap();
10685 let tail = &last.data[5..10];
10686 assert_eq!(last.suffixes.get(tail), Some(5));
10687}
10688
10689#[cfg(any())] #[test]
10691fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
10692 let mut matcher = MatchGenerator::new(128);
10693 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10694 matcher.add_data(
10695 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
10696 SuffixStore::with_capacity(1 << 16),
10697 |_, _| {},
10698 );
10699
10700 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
10701
10702 let last = matcher.window.last().unwrap();
10703 let first_key = &last.data[..MIN_MATCH_LEN];
10704 assert_eq!(last.suffixes.get(first_key), None);
10705}
10706
10707#[cfg(any())] #[test]
10709fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
10710 let mut matcher = MatchGenerator::new(128);
10711 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10712 matcher.add_data(
10713 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
10714 SuffixStore::with_capacity(1 << 16),
10715 |_, _| {},
10716 );
10717
10718 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
10719
10720 let last = matcher.window.last().unwrap();
10721 for pos in [0usize, 3, 6, 9, 12] {
10722 let key = &last.data[pos..pos + MIN_MATCH_LEN];
10723 assert_eq!(
10724 last.suffixes.get(key),
10725 Some(pos),
10726 "expected interleaved suffix registration at pos {pos}"
10727 );
10728 }
10729}
10730
10731#[test]
10732fn dfast_skip_matching_handles_window_eviction() {
10733 let mut matcher = DfastMatchGenerator::new(16);
10734
10735 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
10736 matcher.skip_matching(None);
10737 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
10738 matcher.skip_matching(None);
10739 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
10740
10741 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
10742 matcher.start_matching(|seq| match seq {
10743 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
10744 Sequence::Triple {
10745 literals,
10746 offset,
10747 match_len,
10748 } => {
10749 reconstructed.extend_from_slice(literals);
10750 let start = reconstructed.len() - offset;
10751 for i in 0..match_len {
10752 let byte = reconstructed[start + i];
10753 reconstructed.push(byte);
10754 }
10755 }
10756 });
10757
10758 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
10759}
10760
10761#[test]
10762fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
10763 let mut matcher = DfastMatchGenerator::new(8);
10764
10765 let mut first = Vec::with_capacity(64);
10766 first.extend_from_slice(b"abcdefgh");
10767 matcher.add_data(first, |_| {});
10768
10769 let mut second = Vec::with_capacity(64);
10770 second.extend_from_slice(b"ijklmnop");
10771
10772 let mut observed_evicted_len = None;
10773 matcher.add_data(second, |data| {
10774 observed_evicted_len = Some(data.len());
10775 });
10776
10777 assert_eq!(
10778 observed_evicted_len,
10779 Some(8),
10780 "eviction callback must report evicted byte length, not backing capacity"
10781 );
10782}
10783
10784#[test]
10848fn dfast_commit_space_eviction_uses_window_size_delta() {
10849 use crate::encoding::CompressionLevel;
10850
10851 let mut driver = MatchGeneratorDriver::new(10, 1);
10852 driver.reset(CompressionLevel::Level(3));
10853 assert!(matches!(driver.storage, MatcherStorage::Dfast(_)));
10854
10855 driver.dfast_matcher_mut().max_window_size = 10;
10860 driver.dictionary_retained_budget = 100;
10861
10862 let mut space1 = Vec::with_capacity(64);
10863 space1.extend_from_slice(b"abcd");
10864 driver.commit_space(space1);
10865 assert_eq!(
10866 driver.dictionary_retained_budget, 100,
10867 "1st commit fills window 0 → 4, no eviction, no retire"
10868 );
10869
10870 let mut space2 = Vec::with_capacity(64);
10871 space2.extend_from_slice(b"efgh");
10872 driver.commit_space(space2);
10873 assert_eq!(
10874 driver.dictionary_retained_budget, 100,
10875 "2nd commit fills window 4 → 8, no eviction, no retire"
10876 );
10877
10878 let mut space3 = Vec::with_capacity(64);
10879 space3.extend_from_slice(b"ijklm");
10880 driver.commit_space(space3);
10881 assert_eq!(
10882 driver.dictionary_retained_budget, 87,
10883 "3rd commit + trim_after_budget_retire cascade. With the fix \
10884 (evicted=4 from window_size delta) the cascade reclaims 100 \
10885 → 96 → 92 → 87. With the bug (evicted=5 from data.len()) the \
10886 3rd commit would panic on `data.len() <= max_window_size` \
10887 after the 2nd commit's cascade had already shrunk \
10888 max_window_size to 0."
10889 );
10890 assert_eq!(
10891 driver.dfast_matcher_mut().max_window_size,
10892 0,
10893 "cascade drains max_window_size to 0 once budget reclaim \
10894 exceeds the initial window size"
10895 );
10896}
10897
10898#[test]
10899fn dfast_trim_to_window_evicts_oldest_block_by_length() {
10900 let mut matcher = DfastMatchGenerator::new(16);
10909
10910 let mut first = Vec::with_capacity(64);
10911 first.extend_from_slice(b"abcdefgh");
10912 matcher.add_data(first, |_| {});
10913
10914 let mut second = Vec::with_capacity(64);
10915 second.extend_from_slice(b"ijklmnop");
10916 matcher.add_data(second, |_| {});
10917
10918 assert_eq!(matcher.window_size, 16);
10919 assert_eq!(matcher.window_blocks.len(), 2);
10920
10921 matcher.max_window_size = 8;
10922
10923 matcher.trim_to_window();
10924
10925 assert_eq!(
10933 matcher.window_size, 8,
10934 "exactly one 8-byte block must remain"
10935 );
10936 assert_eq!(matcher.window_blocks.len(), 1);
10937 assert_eq!(matcher.history_abs_start, 8);
10938}
10939
10940#[test]
10941fn dfast_inserts_tail_positions_for_next_block_matching() {
10942 let mut matcher = DfastMatchGenerator::new(1 << 22);
10943
10944 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
10945 let mut history = Vec::new();
10946 matcher.start_matching(|seq| match seq {
10947 Sequence::Literals { literals } => history.extend_from_slice(literals),
10948 Sequence::Triple { .. } => unreachable!("first block should not match history"),
10949 });
10950 assert_eq!(history, b"012345bcdea");
10951
10952 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
10953 let mut saw_first_sequence = false;
10954 matcher.start_matching(|seq| {
10955 assert!(!saw_first_sequence, "expected a single cross-block match");
10956 saw_first_sequence = true;
10957 match seq {
10958 Sequence::Literals { .. } => {
10959 panic!("expected tail-anchored cross-block match before any literals")
10960 }
10961 Sequence::Triple {
10962 literals,
10963 offset,
10964 match_len,
10965 } => {
10966 assert_eq!(literals, b"");
10967 assert_eq!(offset, 5);
10968 assert_eq!(match_len, 11);
10969 let start = history.len() - offset;
10970 for i in 0..match_len {
10971 let byte = history[start + i];
10972 history.push(byte);
10973 }
10974 }
10975 }
10976 });
10977
10978 assert!(
10979 saw_first_sequence,
10980 "expected tail-anchored cross-block match"
10981 );
10982 assert_eq!(history, b"012345bcdeabcdeabcdeab");
10983}
10984
10985#[test]
11012fn hashchain_inserts_tail_positions_for_next_block_matching() {
11013 let mut matcher = HcMatchGenerator::new(1 << 22);
11014 matcher.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
11015
11016 matcher.table.add_data(b"PQRSTBCD".to_vec(), |_| {});
11017 let mut history = alloc::vec::Vec::new();
11018 matcher.start_matching(|seq| match seq {
11019 Sequence::Literals { literals } => history.extend_from_slice(literals),
11020 Sequence::Triple { .. } => unreachable!("first block has no internal repeats"),
11021 });
11022 assert_eq!(history, b"PQRSTBCD");
11023
11024 matcher.table.add_data(b"BCDBCDBCDB".to_vec(), |_| {});
11025 let mut first_sequence_offset: Option<usize> = None;
11026 let mut first_sequence_match_len: Option<usize> = None;
11027 matcher.start_matching(|seq| {
11028 if first_sequence_offset.is_some() {
11029 return;
11030 }
11031 match seq {
11032 Sequence::Literals { .. } => {
11033 panic!(
11034 "expected tail-anchored cross-block match before any literals — \
11035 backfill_boundary_positions did not seed positions 5/6/7"
11036 )
11037 }
11038 Sequence::Triple {
11039 literals,
11040 offset,
11041 match_len,
11042 } => {
11043 assert_eq!(literals, b"", "no leading literals on the boundary match");
11044 first_sequence_offset = Some(offset);
11045 first_sequence_match_len = Some(match_len);
11046 }
11047 }
11048 });
11049
11050 let offset = first_sequence_offset.expect(
11051 "expected tail-anchored cross-block match emitted from backfill_boundary_positions",
11052 );
11053 assert!(
11054 (1..=3).contains(&offset),
11055 "boundary match offset {offset} must point into the unhashable tail \
11056 (positions 5/6/7 of an 8-byte block 1) so the test specifically \
11057 locks down backfill_boundary_positions",
11058 );
11059 assert_eq!(
11060 offset, 3,
11061 "candidate position must land at 5 (= block_1_len - 3) so the 4-byte \
11062 window `data[5..9] = b\"BCDB\"` matches block 2's first hash lookup",
11063 );
11064 let match_len = first_sequence_match_len.unwrap();
11065 assert!(
11066 match_len >= HC_MIN_MATCH_LEN,
11067 "match_len {match_len} must clear the HC min-match floor",
11068 );
11069}
11070
11071#[test]
11072fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
11073 let mut matcher = DfastMatchGenerator::new(1 << 22);
11074 let tail = b"Qz9kLm2Rp";
11075 let mut first = b"0123456789abcdef".to_vec();
11076 first.extend_from_slice(tail);
11077 matcher.add_data(first.clone(), |_| {});
11078 matcher.skip_matching(Some(false));
11079
11080 let mut second = tail.to_vec();
11081 second.extend_from_slice(b"after-tail-literals");
11082 matcher.add_data(second, |_| {});
11083
11084 let mut first_sequence = None;
11085 matcher.start_matching(|seq| {
11086 if first_sequence.is_some() {
11087 return;
11088 }
11089 first_sequence = Some(match seq {
11090 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
11091 Sequence::Triple {
11092 literals,
11093 offset,
11094 match_len,
11095 } => (literals.len(), offset, match_len),
11096 });
11097 });
11098
11099 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
11100 assert_eq!(
11101 lit_len, 0,
11102 "expected immediate cross-block match at block start"
11103 );
11104 assert_eq!(
11105 offset,
11106 tail.len(),
11107 "expected dense skip to preserve cross-boundary tail match"
11108 );
11109 assert!(
11110 match_len >= DFAST_MIN_MATCH_LEN,
11111 "match length should satisfy dfast minimum match length"
11112 );
11113}
11114
11115#[test]
11116fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
11117 let mut matcher = DfastMatchGenerator::new(1 << 22);
11118 let tail = b"Qz9kLm2Rp";
11119 let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
11120 let tail_start = first.len() - tail.len();
11121 first[tail_start..].copy_from_slice(tail);
11122 matcher.add_data(first.clone(), |_| {});
11123
11124 matcher.skip_matching(Some(true));
11125
11126 let mut second = tail.to_vec();
11127 second.extend_from_slice(b"after-tail-literals");
11128 matcher.add_data(second, |_| {});
11129
11130 let mut first_sequence = None;
11131 matcher.start_matching(|seq| {
11132 if first_sequence.is_some() {
11133 return;
11134 }
11135 first_sequence = Some(match seq {
11136 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
11137 Sequence::Triple {
11138 literals,
11139 offset,
11140 match_len,
11141 } => (literals.len(), offset, match_len),
11142 });
11143 });
11144
11145 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
11146 assert_eq!(
11147 lit_len, 0,
11148 "expected immediate cross-block match at block start"
11149 );
11150 assert_eq!(
11151 offset,
11152 tail.len(),
11153 "expected match against densely seeded tail"
11154 );
11155 assert!(
11156 match_len >= DFAST_MIN_MATCH_LEN,
11157 "match length should satisfy dfast minimum match length"
11158 );
11159}
11160
11161#[test]
11162fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
11163 let mut matcher = DfastMatchGenerator::new(1 << 22);
11164 let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
11165 let first_len = first.len();
11166 matcher.add_data(first, |_| {});
11167 matcher.skip_matching_dense();
11168
11169 matcher.add_data(alloc::vec![0xAB], |_| {});
11172 matcher.skip_matching_dense();
11173
11174 let target_abs_pos = first_len - 7;
11175 let target_rel = target_abs_pos - matcher.history_abs_start;
11176 let live = matcher.live_history();
11177 assert!(
11178 target_rel + 8 <= live.len(),
11179 "fixture must make the boundary start long-hashable"
11180 );
11181 let long_hash = matcher.long_hash_index(&live[target_rel..]);
11182 let target_slot = matcher.pack_slot(target_abs_pos);
11183 assert_ne!(
11186 target_slot, DFAST_EMPTY_SLOT,
11187 "pack_slot must never return the empty-slot sentinel for a real position"
11188 );
11189 assert_eq!(
11190 matcher.long_hash[long_hash], target_slot,
11191 "dense skip must seed long-hash entry for newly hashable boundary start"
11192 );
11193}
11194
11195#[test]
11196fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
11197 let mut matcher = DfastMatchGenerator::new(1 << 20);
11198 let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
11199 matcher.add_data(block, |_| {});
11200 matcher.ensure_hash_tables();
11201
11202 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
11203 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
11204 let seed_start = current_len - DFAST_MIN_MATCH_LEN;
11205 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
11206
11207 let target_abs_pos = current_abs_start + current_len - 5;
11208 let target_rel = target_abs_pos - matcher.history_abs_start;
11209 let live = matcher.live_history();
11210 assert!(
11211 target_rel + 5 <= live.len(),
11212 "fixture must leave the last short-hash start valid"
11213 );
11214 let short_hash = matcher.short_hash_index(&live[target_rel..]);
11215 let target_slot = matcher.pack_slot(target_abs_pos);
11216 assert_ne!(
11217 target_slot, DFAST_EMPTY_SLOT,
11218 "pack_slot must never return the empty-slot sentinel for a real position"
11219 );
11220 assert_eq!(
11221 matcher.short_hash[short_hash], target_slot,
11222 "tail seeding must include the last 5-byte-hashable start"
11223 );
11224}
11225
11226#[test]
11227fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
11228 let mut matcher = DfastMatchGenerator::new(1 << 20);
11229 let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
11230 matcher.add_data(block, |_| {});
11231 matcher.ensure_hash_tables();
11232
11233 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
11234 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
11235 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
11236
11237 let target_abs_pos = current_abs_start + current_len - 5;
11238 let target_rel = target_abs_pos - matcher.history_abs_start;
11239 let live = matcher.live_history();
11240 assert!(
11241 target_rel + 5 <= live.len(),
11242 "fixture must leave the last short-hash start valid"
11243 );
11244 let short_hash = matcher.short_hash_index(&live[target_rel..]);
11245 let target_slot = matcher.pack_slot(target_abs_pos);
11246 assert_ne!(
11247 target_slot, DFAST_EMPTY_SLOT,
11248 "pack_slot must never return the empty-slot sentinel for a real position"
11249 );
11250 assert_eq!(
11251 matcher.short_hash[short_hash], target_slot,
11252 "tail seeding must still include the last 5-byte-hashable start when pos is at block end"
11253 );
11254}
11255
11256#[test]
11272fn dfast_ensure_room_for_rebases_above_guard_band() {
11273 let mut dfast = DfastMatchGenerator::new(1 << 22);
11274 dfast.set_hash_bits(10, 10);
11275 dfast.ensure_hash_tables();
11276
11277 let early_abs = 1024usize;
11285 let early_packed = dfast.pack_slot(early_abs);
11286 assert_ne!(early_packed, DFAST_EMPTY_SLOT);
11287 dfast.short_hash[0] = early_packed;
11288 dfast.long_hash[0] = early_packed;
11289
11290 let trigger_abs = (u32::MAX as usize) - (DFAST_REBASE_GUARD_BAND as usize) + 1;
11296 assert_eq!(dfast.position_base, 0);
11297 dfast.ensure_room_for(trigger_abs);
11298 assert_eq!(
11299 dfast.position_base, DFAST_REBASE_GUARD_BAND as usize,
11300 "rebase must advance position_base by DFAST_REBASE_GUARD_BAND"
11301 );
11302
11303 assert_eq!(
11309 dfast.short_hash[0], DFAST_EMPTY_SLOT,
11310 "pre-rebase short-hash entries below the reducer must become empty"
11311 );
11312 assert_eq!(
11313 dfast.long_hash[0], DFAST_EMPTY_SLOT,
11314 "pre-rebase long-hash entries below the reducer must become empty"
11315 );
11316
11317 let post_packed = dfast.pack_slot(trigger_abs);
11321 assert_ne!(post_packed, DFAST_EMPTY_SLOT);
11322 let unpacked = dfast.position_base + (post_packed as usize) - 1;
11323 assert_eq!(
11324 unpacked, trigger_abs,
11325 "post-rebase pack/unpack must round-trip the absolute position"
11326 );
11327}
11328
11329#[test]
11330fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
11331 let mut matcher = DfastMatchGenerator::new(1 << 22);
11332 let boundary_prefix = [0xFA, 0xFB, 0xFC];
11333 let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
11334
11335 let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
11336 let first_tail_start = first.len() - boundary_prefix.len();
11337 first[first_tail_start..].copy_from_slice(&boundary_prefix);
11338 matcher.add_data(first, |_| {});
11339 matcher.skip_matching(Some(true));
11340
11341 let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
11342 second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
11343 matcher.add_data(second.clone(), |_| {});
11344 matcher.skip_matching(Some(true));
11345
11346 let mut third = boundary_prefix.to_vec();
11347 third.extend_from_slice(&boundary_suffix);
11348 third.extend_from_slice(b"-trailing-literals");
11349 matcher.add_data(third, |_| {});
11350
11351 let mut first_sequence = None;
11352 matcher.start_matching(|seq| {
11353 if first_sequence.is_some() {
11354 return;
11355 }
11356 first_sequence = Some(match seq {
11357 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
11358 Sequence::Triple {
11359 literals,
11360 offset,
11361 match_len,
11362 } => (literals.len(), offset, match_len),
11363 });
11364 });
11365
11366 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
11367 assert_eq!(
11368 lit_len, 0,
11369 "expected immediate match from the prior sparse-skip boundary"
11370 );
11371 assert_eq!(
11372 offset,
11373 second.len() + boundary_prefix.len(),
11374 "expected match against backfilled first→second boundary start"
11375 );
11376 assert!(
11377 match_len >= DFAST_MIN_MATCH_LEN,
11378 "match length should satisfy dfast minimum match length"
11379 );
11380}
11381
11382#[test]
11383fn fastest_hint_iteration_23_sequences_reconstruct_source() {
11384 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
11385 let mut state = seed;
11386 let mut data = Vec::with_capacity(len);
11387 for _ in 0..len {
11388 state = state
11389 .wrapping_mul(6364136223846793005)
11390 .wrapping_add(1442695040888963407);
11391 data.push((state >> 33) as u8);
11392 }
11393 data
11394 }
11395
11396 let i = 23u64;
11397 let len = (i * 89 % 16384) as usize;
11398 let mut data = generate_data(i, len);
11399 let repeat = data[128..256].to_vec();
11402 data.extend_from_slice(&repeat);
11403 data.extend_from_slice(&repeat);
11404
11405 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
11406 driver.set_source_size_hint(data.len() as u64);
11407 driver.reset(CompressionLevel::Fastest);
11408 let mut space = driver.get_next_space();
11409 space[..data.len()].copy_from_slice(&data);
11410 space.truncate(data.len());
11411 driver.commit_space(space);
11412
11413 let mut rebuilt = Vec::with_capacity(data.len());
11414 let mut saw_triple = false;
11415 driver.start_matching(|seq| match seq {
11416 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
11417 Sequence::Triple {
11418 literals,
11419 offset,
11420 match_len,
11421 } => {
11422 saw_triple = true;
11423 rebuilt.extend_from_slice(literals);
11424 assert!(offset > 0, "offset must be non-zero");
11425 assert!(
11426 offset <= rebuilt.len(),
11427 "offset must reference already-produced bytes: offset={} produced={}",
11428 offset,
11429 rebuilt.len()
11430 );
11431 let start = rebuilt.len() - offset;
11432 for idx in 0..match_len {
11433 let b = rebuilt[start + idx];
11434 rebuilt.push(b);
11435 }
11436 }
11437 });
11438
11439 let _ = saw_triple;
11449 assert_eq!(rebuilt, data);
11450}
11451
11452#[test]
11453fn fast_levels_dispatch_per_level_hash_log_and_mls() {
11454 let f1 = resolve_level_params(CompressionLevel::Level(1), None)
11457 .fast
11458 .unwrap();
11459 assert_eq!(f1.hash_log, 14);
11460 assert_eq!(f1.mls, 7);
11461 assert_eq!(f1.step_size, 2);
11462
11463 for n in -7..=-1 {
11471 let f = resolve_level_params(CompressionLevel::Level(n), None)
11472 .fast
11473 .unwrap();
11474 assert_eq!(f.hash_log, 13, "Level({n}) fast_hash_log");
11475 assert_eq!(f.mls, 7, "Level({n}) fast_mls");
11476 let expected_step = ((-n) as usize) + 1;
11477 assert_eq!(f.step_size, expected_step, "Level({n}) fast_step_size");
11478 }
11479
11480 let pf = resolve_level_params(CompressionLevel::Fastest, None);
11483 let ff = pf.fast.unwrap();
11484 assert_eq!(
11485 (pf.window_log, ff.hash_log, ff.mls, ff.step_size),
11486 (19, 14, 6, 2),
11487 );
11488 let pu = resolve_level_params(CompressionLevel::Uncompressed, None);
11491 let fu = pu.fast.unwrap();
11492 assert_eq!(
11493 (pu.window_log, fu.hash_log, fu.mls, fu.step_size),
11494 (17, 14, 6, 2),
11495 );
11496}
11497
11498#[test]
11506fn fast_levels_driver_wiring_threads_cparams_into_inner_matcher() {
11507 let mut driver = MatchGeneratorDriver::new(64 * 1024, 1);
11508
11509 let fast_levels = [
11510 CompressionLevel::Level(1),
11511 CompressionLevel::Fastest,
11512 CompressionLevel::Uncompressed,
11513 CompressionLevel::Level(-1),
11514 CompressionLevel::Level(-2),
11515 CompressionLevel::Level(-3),
11516 CompressionLevel::Level(-4),
11517 CompressionLevel::Level(-5),
11518 CompressionLevel::Level(-6),
11519 CompressionLevel::Level(-7),
11520 ];
11521
11522 for &level in &fast_levels {
11523 let p = resolve_level_params(level, None);
11524 assert_eq!(
11528 p.strategy_tag,
11529 super::strategy::StrategyTag::Fast,
11530 "{level:?} must resolve to Fast strategy",
11531 );
11532
11533 crate::encoding::Matcher::reset(&mut driver, CompressionLevel::Default);
11543
11544 crate::encoding::Matcher::reset(&mut driver, level);
11547
11548 let f = p.fast.unwrap();
11549 let m = driver.simple_mut();
11550 assert_eq!(
11551 m.hash_log(),
11552 f.hash_log,
11553 "{level:?}: inner matcher hash_log mismatch — argument swap?",
11554 );
11555 assert_eq!(
11556 m.mls(),
11557 f.mls,
11558 "{level:?}: inner matcher mls mismatch — argument swap?",
11559 );
11560 assert_eq!(
11561 m.step_size(),
11562 f.step_size,
11563 "{level:?}: inner matcher step_size mismatch — stale value carried from prior reset?",
11564 );
11565 }
11566}
11567
11568#[test]
11581fn lazy_band_target_len_matches_donor_default_table() {
11582 use zstd::zstd_safe::zstd_sys;
11583
11584 for level in 5..=15i32 {
11585 let reference = unsafe { zstd_sys::ZSTD_getCParams(level, 0, 0) };
11588 let params = resolve_level_params(CompressionLevel::Level(level), None);
11589 let target_len = params
11592 .hc
11593 .map(|hc| hc.target_len)
11594 .or_else(|| params.row.map(|row| row.target_len))
11595 .expect("lazy/greedy level carries hc or row config");
11596 assert_eq!(
11597 target_len as u32, reference.targetLength,
11598 "L{level}: target_len ({target_len}) must match reference cParams.targetLength ({})",
11599 reference.targetLength
11600 );
11601 }
11602}
11603
11604#[test]
11613fn upper_lazy_band_params_match_donor_default_table() {
11614 use zstd::zstd_safe::zstd_sys;
11615
11616 for level in 13..=15i32 {
11617 let reference = unsafe { zstd_sys::ZSTD_getCParams(level, 0, 0) };
11620 let params = resolve_level_params(CompressionLevel::Level(level), None);
11621 let hc = params.hc.unwrap();
11622 assert_eq!(
11623 hc.search_depth as u32,
11624 1u32 << reference.searchLog,
11625 "L{level}: hc.search_depth ({}) must equal 1<<cParams.searchLog ({})",
11626 hc.search_depth,
11627 1u32 << reference.searchLog
11628 );
11629 assert_eq!(
11630 params.window_log as u32, reference.windowLog,
11631 "L{level}: window_log ({}) must equal cParams.windowLog ({})",
11632 params.window_log, reference.windowLog
11633 );
11634 assert_eq!(
11635 hc.hash_log as u32, reference.hashLog,
11636 "L{level}: hc.hash_log ({}) must equal cParams.hashLog ({})",
11637 hc.hash_log, reference.hashLog
11638 );
11639 assert_eq!(
11640 hc.chain_log as u32, reference.chainLog,
11641 "L{level}: hc.chain_log ({}) must equal cParams.chainLog ({})",
11642 hc.chain_log, reference.chainLog
11643 );
11644 }
11645}