1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
11use core::arch::aarch64::{uint8x16_t, vceqq_u8, vgetq_lane_u64, vld1q_u8, vreinterpretq_u64_u8};
12#[cfg(target_arch = "x86")]
13use core::arch::x86::{
14 __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
15 _mm256_loadu_si256, _mm256_movemask_epi8,
16};
17#[cfg(target_arch = "x86_64")]
18use core::arch::x86_64::{
19 __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
20 _mm256_loadu_si256, _mm256_movemask_epi8,
21};
22use core::convert::TryInto;
23use core::num::NonZeroUsize;
24
25use super::BETTER_WINDOW_LOG;
26use super::CompressionLevel;
27use super::Matcher;
28use super::Sequence;
29use super::blocks::encode_offset_with_history;
30use super::incompressible::{block_looks_incompressible, block_looks_incompressible_strict};
31#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
32use std::arch::is_aarch64_feature_detected;
33#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
34use std::arch::is_x86_feature_detected;
35#[cfg(feature = "std")]
36use std::sync::OnceLock;
37
38const MIN_MATCH_LEN: usize = 5;
39const FAST_HASH_FILL_STEP: usize = 3;
40const INCOMPRESSIBLE_SKIP_STEP: usize = 8;
41const DFAST_MIN_MATCH_LEN: usize = 6;
42const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
43const ROW_MIN_MATCH_LEN: usize = 6;
44const DFAST_TARGET_LEN: usize = 48;
45const DFAST_HASH_BITS: usize = 20;
48const DFAST_SEARCH_DEPTH: usize = 4;
49const DFAST_EMPTY_SLOT: usize = usize::MAX;
50const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
51const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
52const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
53const DFAST_MAX_SKIP_STEP: usize = 8;
54const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
55const ROW_HASH_BITS: usize = 20;
56const ROW_LOG: usize = 5;
57const ROW_SEARCH_DEPTH: usize = 16;
58const ROW_TARGET_LEN: usize = 48;
59const ROW_TAG_BITS: usize = 8;
60const ROW_EMPTY_SLOT: usize = usize::MAX;
61const ROW_HASH_KEY_LEN: usize = 4;
62
63const HC_HASH_LOG: usize = 20;
64const HC_CHAIN_LOG: usize = 19;
65const HC_SEARCH_DEPTH: usize = 16;
66const HC_MIN_MATCH_LEN: usize = 5;
67const HC_TARGET_LEN: usize = 48;
68const HC_EMPTY: u32 = 0;
71
72const MAX_HC_SEARCH_DEPTH: usize = 32;
75
76#[derive(Copy, Clone, Debug, Eq, PartialEq)]
77enum PrefixKernel {
78 Scalar,
79 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
80 X86Sse2,
81 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
82 X86Avx2,
83 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
84 Aarch64Neon,
85}
86
87#[derive(Copy, Clone)]
90struct HcConfig {
91 hash_log: usize,
92 chain_log: usize,
93 search_depth: usize,
94 target_len: usize,
95}
96
97#[derive(Copy, Clone)]
98struct RowConfig {
99 hash_bits: usize,
100 row_log: usize,
101 search_depth: usize,
102 target_len: usize,
103}
104
105const HC_CONFIG: HcConfig = HcConfig {
106 hash_log: HC_HASH_LOG,
107 chain_log: HC_CHAIN_LOG,
108 search_depth: HC_SEARCH_DEPTH,
109 target_len: HC_TARGET_LEN,
110};
111
112const BEST_HC_CONFIG: HcConfig = HcConfig {
114 hash_log: 21,
115 chain_log: 20,
116 search_depth: 32,
117 target_len: 128,
118};
119
120const ROW_CONFIG: RowConfig = RowConfig {
121 hash_bits: ROW_HASH_BITS,
122 row_log: ROW_LOG,
123 search_depth: ROW_SEARCH_DEPTH,
124 target_len: ROW_TARGET_LEN,
125};
126
127#[derive(Copy, Clone)]
129struct LevelParams {
130 backend: MatcherBackend,
131 window_log: u8,
132 hash_fill_step: usize,
133 lazy_depth: u8,
134 hc: HcConfig,
135 row: RowConfig,
136}
137
138fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
139 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
140 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
141}
142
143fn row_hash_bits_for_window(max_window_size: usize) -> usize {
144 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
145 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
146}
147
148#[rustfmt::skip]
157const LEVEL_TABLE: [LevelParams; 22] = [
158 LevelParams { backend: MatcherBackend::Simple, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
161 LevelParams { backend: MatcherBackend::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
162 LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
163 LevelParams { backend: MatcherBackend::Row, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
164 LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
165 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
166 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
167 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
168 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
169 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
170 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
171 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
172 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
173 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
174 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
175 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
176 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
177 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
178 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
179 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
180 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
181 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
182];
183
184const MIN_WINDOW_LOG: u8 = 10;
186const MIN_HINTED_WINDOW_LOG: u8 = 14;
192
193fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
203 let src_log = if src_size == 0 {
208 MIN_WINDOW_LOG
209 } else {
210 (64 - (src_size - 1).leading_zeros()) as u8 };
212 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
213 if src_log < params.window_log {
214 params.window_log = src_log;
215 }
216 if params.backend == MatcherBackend::HashChain {
220 if (src_log + 2) < params.hc.hash_log as u8 {
221 params.hc.hash_log = (src_log + 2) as usize;
222 }
223 if (src_log + 1) < params.hc.chain_log as u8 {
224 params.hc.chain_log = (src_log + 1) as usize;
225 }
226 } else if params.backend == MatcherBackend::Row {
227 let max_window_size = 1usize << params.window_log;
228 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
229 }
230 params
231}
232
233fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
236 let params = match level {
237 CompressionLevel::Uncompressed => LevelParams {
238 backend: MatcherBackend::Simple,
239 window_log: 17,
240 hash_fill_step: 1,
241 lazy_depth: 0,
242 hc: HC_CONFIG,
243 row: ROW_CONFIG,
244 },
245 CompressionLevel::Fastest => LEVEL_TABLE[0],
246 CompressionLevel::Default => LEVEL_TABLE[2],
247 CompressionLevel::Better => LEVEL_TABLE[6],
248 CompressionLevel::Best => LEVEL_TABLE[10],
249 CompressionLevel::Level(n) => {
250 if n > 0 {
251 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
252 LEVEL_TABLE[idx]
253 } else if n == 0 {
254 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
256 } else {
257 let acceleration =
261 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
262 let step = (acceleration + 3).min(128);
263 LevelParams {
264 backend: MatcherBackend::Simple,
265 window_log: 17,
266 hash_fill_step: step,
267 lazy_depth: 0,
268 hc: HC_CONFIG,
269 row: ROW_CONFIG,
270 }
271 }
272 }
273 };
274 if let Some(size) = source_size {
275 adjust_params_for_source_size(params, size)
276 } else {
277 params
278 }
279}
280
281#[derive(Copy, Clone, Debug, PartialEq, Eq)]
282enum MatcherBackend {
283 Simple,
284 Dfast,
285 Row,
286 HashChain,
287}
288
289pub struct MatchGeneratorDriver {
291 vec_pool: Vec<Vec<u8>>,
292 suffix_pool: Vec<SuffixStore>,
293 match_generator: MatchGenerator,
294 dfast_match_generator: Option<DfastMatchGenerator>,
295 row_match_generator: Option<RowMatchGenerator>,
296 hc_match_generator: Option<HcMatchGenerator>,
297 active_backend: MatcherBackend,
298 slice_size: usize,
299 base_slice_size: usize,
300 reported_window_size: usize,
303 dictionary_retained_budget: usize,
306 source_size_hint: Option<u64>,
308}
309
310impl MatchGeneratorDriver {
311 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
316 let max_window_size = max_slices_in_window * slice_size;
317 Self {
318 vec_pool: Vec::new(),
319 suffix_pool: Vec::new(),
320 match_generator: MatchGenerator::new(max_window_size),
321 dfast_match_generator: None,
322 row_match_generator: None,
323 hc_match_generator: None,
324 active_backend: MatcherBackend::Simple,
325 slice_size,
326 base_slice_size: slice_size,
327 reported_window_size: max_window_size,
328 dictionary_retained_budget: 0,
329 source_size_hint: None,
330 }
331 }
332
333 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
334 resolve_level_params(level, source_size)
335 }
336
337 fn dfast_matcher(&self) -> &DfastMatchGenerator {
338 self.dfast_match_generator
339 .as_ref()
340 .expect("dfast backend must be initialized by reset() before use")
341 }
342
343 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
344 self.dfast_match_generator
345 .as_mut()
346 .expect("dfast backend must be initialized by reset() before use")
347 }
348
349 fn row_matcher(&self) -> &RowMatchGenerator {
350 self.row_match_generator
351 .as_ref()
352 .expect("row backend must be initialized by reset() before use")
353 }
354
355 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
356 self.row_match_generator
357 .as_mut()
358 .expect("row backend must be initialized by reset() before use")
359 }
360
361 fn hc_matcher(&self) -> &HcMatchGenerator {
362 self.hc_match_generator
363 .as_ref()
364 .expect("hash chain backend must be initialized by reset() before use")
365 }
366
367 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
368 self.hc_match_generator
369 .as_mut()
370 .expect("hash chain backend must be initialized by reset() before use")
371 }
372
373 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
374 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
375 if reclaimed == 0 {
376 return;
377 }
378 self.dictionary_retained_budget -= reclaimed;
379 match self.active_backend {
380 MatcherBackend::Simple => {
381 self.match_generator.max_window_size = self
382 .match_generator
383 .max_window_size
384 .saturating_sub(reclaimed);
385 }
386 MatcherBackend::Dfast => {
387 let matcher = self.dfast_matcher_mut();
388 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
389 }
390 MatcherBackend::Row => {
391 let matcher = self.row_matcher_mut();
392 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
393 }
394 MatcherBackend::HashChain => {
395 let matcher = self.hc_matcher_mut();
396 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
397 }
398 }
399 }
400
401 fn trim_after_budget_retire(&mut self) {
402 loop {
403 let mut evicted_bytes = 0usize;
404 match self.active_backend {
405 MatcherBackend::Simple => {
406 let vec_pool = &mut self.vec_pool;
407 let suffix_pool = &mut self.suffix_pool;
408 self.match_generator.reserve(0, |mut data, mut suffixes| {
409 evicted_bytes += data.len();
410 data.resize(data.capacity(), 0);
411 vec_pool.push(data);
412 suffixes.slots.clear();
413 suffixes.slots.resize(suffixes.slots.capacity(), None);
414 suffix_pool.push(suffixes);
415 });
416 }
417 MatcherBackend::Dfast => {
418 let mut retired = Vec::new();
419 self.dfast_matcher_mut().trim_to_window(|data| {
420 evicted_bytes += data.len();
421 retired.push(data);
422 });
423 for mut data in retired {
424 data.resize(data.capacity(), 0);
425 self.vec_pool.push(data);
426 }
427 }
428 MatcherBackend::Row => {
429 let mut retired = Vec::new();
430 self.row_matcher_mut().trim_to_window(|data| {
431 evicted_bytes += data.len();
432 retired.push(data);
433 });
434 for mut data in retired {
435 data.resize(data.capacity(), 0);
436 self.vec_pool.push(data);
437 }
438 }
439 MatcherBackend::HashChain => {
440 let mut retired = Vec::new();
441 self.hc_matcher_mut().trim_to_window(|data| {
442 evicted_bytes += data.len();
443 retired.push(data);
444 });
445 for mut data in retired {
446 data.resize(data.capacity(), 0);
447 self.vec_pool.push(data);
448 }
449 }
450 }
451 if evicted_bytes == 0 {
452 break;
453 }
454 self.retire_dictionary_budget(evicted_bytes);
455 }
456 }
457
458 fn skip_matching_for_dictionary_priming(&mut self) {
459 match self.active_backend {
460 MatcherBackend::Simple => self.match_generator.skip_matching_with_hint(Some(false)),
461 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
462 MatcherBackend::Row => self.row_matcher_mut().skip_matching_with_hint(Some(false)),
463 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(Some(false)),
464 }
465 }
466}
467
468impl Matcher for MatchGeneratorDriver {
469 fn supports_dictionary_priming(&self) -> bool {
470 true
471 }
472
473 fn set_source_size_hint(&mut self, size: u64) {
474 self.source_size_hint = Some(size);
475 }
476
477 fn reset(&mut self, level: CompressionLevel) {
478 let hint = self.source_size_hint.take();
479 let hinted = hint.is_some();
480 let params = Self::level_params(level, hint);
481 let max_window_size = 1usize << params.window_log;
482 self.dictionary_retained_budget = 0;
483 if self.active_backend != params.backend {
484 match self.active_backend {
485 MatcherBackend::Simple => {
486 let vec_pool = &mut self.vec_pool;
487 let suffix_pool = &mut self.suffix_pool;
488 self.match_generator.reset(|mut data, mut suffixes| {
489 data.resize(data.capacity(), 0);
490 vec_pool.push(data);
491 suffixes.slots.clear();
492 suffixes.slots.resize(suffixes.slots.capacity(), None);
493 suffix_pool.push(suffixes);
494 });
495 }
496 MatcherBackend::Dfast => {
497 if let Some(dfast) = self.dfast_match_generator.as_mut() {
498 let vec_pool = &mut self.vec_pool;
499 dfast.reset(|mut data| {
500 data.resize(data.capacity(), 0);
501 vec_pool.push(data);
502 });
503 }
504 }
505 MatcherBackend::Row => {
506 if let Some(row) = self.row_match_generator.as_mut() {
507 row.row_heads = Vec::new();
508 row.row_positions = Vec::new();
509 row.row_tags = Vec::new();
510 let vec_pool = &mut self.vec_pool;
511 row.reset(|mut data| {
512 data.resize(data.capacity(), 0);
513 vec_pool.push(data);
514 });
515 }
516 }
517 MatcherBackend::HashChain => {
518 if let Some(hc) = self.hc_match_generator.as_mut() {
519 hc.hash_table = Vec::new();
522 hc.chain_table = Vec::new();
523 let vec_pool = &mut self.vec_pool;
524 hc.reset(|mut data| {
525 data.resize(data.capacity(), 0);
526 vec_pool.push(data);
527 });
528 }
529 }
530 }
531 }
532
533 self.active_backend = params.backend;
534 self.slice_size = self.base_slice_size.min(max_window_size);
535 self.reported_window_size = max_window_size;
536 match self.active_backend {
537 MatcherBackend::Simple => {
538 let vec_pool = &mut self.vec_pool;
539 let suffix_pool = &mut self.suffix_pool;
540 self.match_generator.max_window_size = max_window_size;
541 self.match_generator.hash_fill_step = params.hash_fill_step;
542 self.match_generator.reset(|mut data, mut suffixes| {
543 data.resize(data.capacity(), 0);
544 vec_pool.push(data);
545 suffixes.slots.clear();
546 suffixes.slots.resize(suffixes.slots.capacity(), None);
547 suffix_pool.push(suffixes);
548 });
549 }
550 MatcherBackend::Dfast => {
551 let dfast = self
552 .dfast_match_generator
553 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
554 dfast.max_window_size = max_window_size;
555 dfast.lazy_depth = params.lazy_depth;
556 dfast.use_fast_loop = matches!(
557 level,
558 CompressionLevel::Default
559 | CompressionLevel::Level(0)
560 | CompressionLevel::Level(3)
561 );
562 dfast.set_hash_bits(if hinted {
563 dfast_hash_bits_for_window(max_window_size)
564 } else {
565 DFAST_HASH_BITS
566 });
567 let vec_pool = &mut self.vec_pool;
568 dfast.reset(|mut data| {
569 data.resize(data.capacity(), 0);
570 vec_pool.push(data);
571 });
572 }
573 MatcherBackend::Row => {
574 let row = self
575 .row_match_generator
576 .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
577 row.max_window_size = max_window_size;
578 row.lazy_depth = params.lazy_depth;
579 row.configure(params.row);
580 if hinted {
581 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
582 }
583 let vec_pool = &mut self.vec_pool;
584 row.reset(|mut data| {
585 data.resize(data.capacity(), 0);
586 vec_pool.push(data);
587 });
588 }
589 MatcherBackend::HashChain => {
590 let hc = self
591 .hc_match_generator
592 .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
593 hc.max_window_size = max_window_size;
594 hc.lazy_depth = params.lazy_depth;
595 hc.configure(params.hc);
596 let vec_pool = &mut self.vec_pool;
597 hc.reset(|mut data| {
598 data.resize(data.capacity(), 0);
599 vec_pool.push(data);
600 });
601 }
602 }
603 }
604
605 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
606 match self.active_backend {
607 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
608 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
609 MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
610 MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
611 }
612
613 if dict_content.is_empty() {
614 return;
615 }
616
617 let retained_dict_budget = dict_content.len();
620 match self.active_backend {
621 MatcherBackend::Simple => {
622 self.match_generator.max_window_size = self
623 .match_generator
624 .max_window_size
625 .saturating_add(retained_dict_budget);
626 }
627 MatcherBackend::Dfast => {
628 let matcher = self.dfast_matcher_mut();
629 matcher.max_window_size =
630 matcher.max_window_size.saturating_add(retained_dict_budget);
631 }
632 MatcherBackend::Row => {
633 let matcher = self.row_matcher_mut();
634 matcher.max_window_size =
635 matcher.max_window_size.saturating_add(retained_dict_budget);
636 }
637 MatcherBackend::HashChain => {
638 let matcher = self.hc_matcher_mut();
639 matcher.max_window_size =
640 matcher.max_window_size.saturating_add(retained_dict_budget);
641 }
642 }
643
644 let mut start = 0usize;
645 let mut committed_dict_budget = 0usize;
646 let min_primed_tail = match self.active_backend {
650 MatcherBackend::Simple => MIN_MATCH_LEN,
651 MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
652 };
653 while start < dict_content.len() {
654 let end = (start + self.slice_size).min(dict_content.len());
655 if end - start < min_primed_tail {
656 break;
657 }
658 let mut space = self.get_next_space();
659 space.clear();
660 space.extend_from_slice(&dict_content[start..end]);
661 self.commit_space(space);
662 self.skip_matching_for_dictionary_priming();
663 committed_dict_budget += end - start;
664 start = end;
665 }
666
667 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
668 if uncommitted_tail_budget > 0 {
669 match self.active_backend {
670 MatcherBackend::Simple => {
671 self.match_generator.max_window_size = self
672 .match_generator
673 .max_window_size
674 .saturating_sub(uncommitted_tail_budget);
675 }
676 MatcherBackend::Dfast => {
677 let matcher = self.dfast_matcher_mut();
678 matcher.max_window_size = matcher
679 .max_window_size
680 .saturating_sub(uncommitted_tail_budget);
681 }
682 MatcherBackend::Row => {
683 let matcher = self.row_matcher_mut();
684 matcher.max_window_size = matcher
685 .max_window_size
686 .saturating_sub(uncommitted_tail_budget);
687 }
688 MatcherBackend::HashChain => {
689 let matcher = self.hc_matcher_mut();
690 matcher.max_window_size = matcher
691 .max_window_size
692 .saturating_sub(uncommitted_tail_budget);
693 }
694 }
695 }
696 if committed_dict_budget > 0 {
697 self.dictionary_retained_budget = self
698 .dictionary_retained_budget
699 .saturating_add(committed_dict_budget);
700 }
701 }
702
703 fn window_size(&self) -> u64 {
704 self.reported_window_size as u64
705 }
706
707 fn get_next_space(&mut self) -> Vec<u8> {
708 if let Some(mut space) = self.vec_pool.pop() {
709 if space.len() > self.slice_size {
710 space.truncate(self.slice_size);
711 }
712 if space.len() < self.slice_size {
713 space.resize(self.slice_size, 0);
714 }
715 return space;
716 }
717 alloc::vec![0; self.slice_size]
718 }
719
720 fn get_last_space(&mut self) -> &[u8] {
721 match self.active_backend {
722 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
723 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
724 MatcherBackend::Row => self.row_matcher().get_last_space(),
725 MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
726 }
727 }
728
729 fn commit_space(&mut self, space: Vec<u8>) {
730 match self.active_backend {
731 MatcherBackend::Simple => {
732 let vec_pool = &mut self.vec_pool;
733 let mut evicted_bytes = 0usize;
734 let suffixes = match self.suffix_pool.pop() {
735 Some(store) if store.slots.len() >= space.len() => store,
736 _ => SuffixStore::with_capacity(space.len()),
737 };
738 let suffix_pool = &mut self.suffix_pool;
739 self.match_generator
740 .add_data(space, suffixes, |mut data, mut suffixes| {
741 evicted_bytes += data.len();
742 data.resize(data.capacity(), 0);
743 vec_pool.push(data);
744 suffixes.slots.clear();
745 suffixes.slots.resize(suffixes.slots.capacity(), None);
746 suffix_pool.push(suffixes);
747 });
748 self.retire_dictionary_budget(evicted_bytes);
749 self.trim_after_budget_retire();
750 }
751 MatcherBackend::Dfast => {
752 let vec_pool = &mut self.vec_pool;
753 let mut evicted_bytes = 0usize;
754 self.dfast_match_generator
755 .as_mut()
756 .expect("dfast backend must be initialized by reset() before use")
757 .add_data(space, |mut data| {
758 evicted_bytes += data.len();
759 data.resize(data.capacity(), 0);
760 vec_pool.push(data);
761 });
762 self.retire_dictionary_budget(evicted_bytes);
763 self.trim_after_budget_retire();
764 }
765 MatcherBackend::Row => {
766 let vec_pool = &mut self.vec_pool;
767 let mut evicted_bytes = 0usize;
768 self.row_match_generator
769 .as_mut()
770 .expect("row backend must be initialized by reset() before use")
771 .add_data(space, |mut data| {
772 evicted_bytes += data.len();
773 data.resize(data.capacity(), 0);
774 vec_pool.push(data);
775 });
776 self.retire_dictionary_budget(evicted_bytes);
777 self.trim_after_budget_retire();
778 }
779 MatcherBackend::HashChain => {
780 let vec_pool = &mut self.vec_pool;
781 let mut evicted_bytes = 0usize;
782 self.hc_match_generator
783 .as_mut()
784 .expect("hash chain backend must be initialized by reset() before use")
785 .add_data(space, |mut data| {
786 evicted_bytes += data.len();
787 data.resize(data.capacity(), 0);
788 vec_pool.push(data);
789 });
790 self.retire_dictionary_budget(evicted_bytes);
791 self.trim_after_budget_retire();
792 }
793 }
794 }
795
796 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
797 match self.active_backend {
798 MatcherBackend::Simple => {
799 while self.match_generator.next_sequence(&mut handle_sequence) {}
800 }
801 MatcherBackend::Dfast => self
802 .dfast_matcher_mut()
803 .start_matching(&mut handle_sequence),
804 MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
805 MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
806 }
807 }
808
809 fn skip_matching(&mut self) {
810 self.skip_matching_with_hint(None);
811 }
812
813 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
814 match self.active_backend {
815 MatcherBackend::Simple => self
816 .match_generator
817 .skip_matching_with_hint(incompressible_hint),
818 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(incompressible_hint),
819 MatcherBackend::Row => self
820 .row_matcher_mut()
821 .skip_matching_with_hint(incompressible_hint),
822 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(incompressible_hint),
823 }
824 }
825}
826
827struct SuffixStore {
830 slots: Vec<Option<NonZeroUsize>>,
834 len_log: u32,
835}
836
837impl SuffixStore {
838 fn with_capacity(capacity: usize) -> Self {
839 Self {
840 slots: alloc::vec![None; capacity],
841 len_log: capacity.ilog2(),
842 }
843 }
844
845 #[inline(always)]
846 fn insert(&mut self, suffix: &[u8], idx: usize) {
847 let key = self.key(suffix);
848 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
849 }
850
851 #[inline(always)]
852 fn contains_key(&self, suffix: &[u8]) -> bool {
853 let key = self.key(suffix);
854 self.slots[key].is_some()
855 }
856
857 #[inline(always)]
858 fn get(&self, suffix: &[u8]) -> Option<usize> {
859 let key = self.key(suffix);
860 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
861 }
862
863 #[inline(always)]
864 fn key(&self, suffix: &[u8]) -> usize {
865 if self.len_log == 0 {
867 return 0;
868 }
869
870 let s0 = suffix[0] as u64;
871 let s1 = suffix[1] as u64;
872 let s2 = suffix[2] as u64;
873 let s3 = suffix[3] as u64;
874 let s4 = suffix[4] as u64;
875
876 const POLY: u64 = 0xCF3BCCDCABu64;
877
878 let s0 = (s0 << 24).wrapping_mul(POLY);
879 let s1 = (s1 << 32).wrapping_mul(POLY);
880 let s2 = (s2 << 40).wrapping_mul(POLY);
881 let s3 = (s3 << 48).wrapping_mul(POLY);
882 let s4 = (s4 << 56).wrapping_mul(POLY);
883
884 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
885 let index = index >> (64 - self.len_log);
886 index as usize % self.slots.len()
887 }
888}
889
890struct WindowEntry {
893 data: Vec<u8>,
894 suffixes: SuffixStore,
896 base_offset: usize,
898}
899
900pub(crate) struct MatchGenerator {
901 max_window_size: usize,
902 window: Vec<WindowEntry>,
905 window_size: usize,
906 #[cfg(debug_assertions)]
907 concat_window: Vec<u8>,
908 suffix_idx: usize,
910 last_idx_in_sequence: usize,
912 hash_fill_step: usize,
913 offset_hist: [u32; 3],
914}
915
916impl MatchGenerator {
917 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
918 #[inline(always)]
919 const fn select_x86_prefix_kernel(has_avx2: bool, has_sse2: bool) -> PrefixKernel {
920 if has_avx2 {
921 return PrefixKernel::X86Avx2;
922 }
923 if has_sse2 {
924 return PrefixKernel::X86Sse2;
925 }
926 PrefixKernel::Scalar
927 }
928
929 #[cfg(feature = "std")]
930 #[inline(always)]
931 fn detect_prefix_kernel() -> PrefixKernel {
932 static KERNEL: OnceLock<PrefixKernel> = OnceLock::new();
933 *KERNEL.get_or_init(|| {
934 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
935 {
936 let kernel = Self::select_x86_prefix_kernel(
937 is_x86_feature_detected!("avx2"),
938 is_x86_feature_detected!("sse2"),
939 );
940 if kernel != PrefixKernel::Scalar {
941 return kernel;
942 }
943 }
944 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
945 {
946 if is_aarch64_feature_detected!("neon") {
947 return PrefixKernel::Aarch64Neon;
948 }
949 }
950 PrefixKernel::Scalar
951 })
952 }
953
954 #[cfg(not(feature = "std"))]
955 #[inline(always)]
956 fn detect_prefix_kernel() -> PrefixKernel {
957 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
958 {
959 let kernel = Self::select_x86_prefix_kernel(
960 cfg!(target_feature = "avx2"),
961 cfg!(target_feature = "sse2"),
962 );
963 if kernel != PrefixKernel::Scalar {
964 return kernel;
965 }
966 }
967 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
968 {
969 if cfg!(target_feature = "neon") {
970 return PrefixKernel::Aarch64Neon;
971 }
972 }
973 PrefixKernel::Scalar
974 }
975
976 #[inline(always)]
977 #[cfg(target_endian = "little")]
978 fn mismatch_byte_index(diff: usize) -> usize {
979 diff.trailing_zeros() as usize / 8
980 }
981
982 #[inline(always)]
983 #[cfg(target_endian = "big")]
984 fn mismatch_byte_index(diff: usize) -> usize {
985 diff.leading_zeros() as usize / 8
986 }
987
988 fn new(max_size: usize) -> Self {
990 Self {
991 max_window_size: max_size,
992 window: Vec::new(),
993 window_size: 0,
994 #[cfg(debug_assertions)]
995 concat_window: Vec::new(),
996 suffix_idx: 0,
997 last_idx_in_sequence: 0,
998 hash_fill_step: 1,
999 offset_hist: [1, 4, 8],
1000 }
1001 }
1002
1003 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1004 self.window_size = 0;
1005 #[cfg(debug_assertions)]
1006 self.concat_window.clear();
1007 self.suffix_idx = 0;
1008 self.last_idx_in_sequence = 0;
1009 self.offset_hist = [1, 4, 8];
1010 self.window.drain(..).for_each(|entry| {
1011 reuse_space(entry.data, entry.suffixes);
1012 });
1013 }
1014
1015 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
1020 loop {
1021 let last_entry = self.window.last().unwrap();
1022 let data_slice = &last_entry.data;
1023
1024 if self.suffix_idx >= data_slice.len() {
1026 if self.last_idx_in_sequence != self.suffix_idx {
1027 let literals = &data_slice[self.last_idx_in_sequence..];
1028 self.last_idx_in_sequence = self.suffix_idx;
1029 handle_sequence(Sequence::Literals { literals });
1030 return true;
1031 } else {
1032 return false;
1033 }
1034 }
1035
1036 let data_slice = &data_slice[self.suffix_idx..];
1038 if data_slice.len() < MIN_MATCH_LEN {
1039 let last_idx_in_sequence = self.last_idx_in_sequence;
1040 self.last_idx_in_sequence = last_entry.data.len();
1041 self.suffix_idx = last_entry.data.len();
1042 handle_sequence(Sequence::Literals {
1043 literals: &last_entry.data[last_idx_in_sequence..],
1044 });
1045 return true;
1046 }
1047
1048 let key = &data_slice[..MIN_MATCH_LEN];
1050 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
1051
1052 let mut candidate = self.repcode_candidate(data_slice, literals_len);
1054 for match_entry in self.window.iter() {
1055 if let Some(match_index) = match_entry.suffixes.get(key) {
1056 let match_slice = &match_entry.data[match_index..];
1057
1058 let match_len = Self::common_prefix_len(match_slice, data_slice);
1060
1061 if match_len >= MIN_MATCH_LEN {
1063 let offset = match_entry.base_offset + self.suffix_idx - match_index;
1064
1065 #[cfg(debug_assertions)]
1067 {
1068 let unprocessed = last_entry.data.len() - self.suffix_idx;
1069 let start = self.concat_window.len() - unprocessed - offset;
1070 let end = start + match_len;
1071 let check_slice = &self.concat_window[start..end];
1072 debug_assert_eq!(check_slice, &match_slice[..match_len]);
1073 }
1074
1075 if let Some((old_offset, old_match_len)) = candidate {
1076 if match_len > old_match_len
1077 || (match_len == old_match_len && offset < old_offset)
1078 {
1079 candidate = Some((offset, match_len));
1080 }
1081 } else {
1082 candidate = Some((offset, match_len));
1083 }
1084 }
1085 }
1086 }
1087
1088 if let Some((offset, match_len)) = candidate {
1089 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
1092
1093 let last_entry = self.window.last().unwrap();
1095 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
1096
1097 self.suffix_idx += match_len;
1099 self.last_idx_in_sequence = self.suffix_idx;
1100 let _ = encode_offset_with_history(
1101 offset as u32,
1102 literals.len() as u32,
1103 &mut self.offset_hist,
1104 );
1105 handle_sequence(Sequence::Triple {
1106 literals,
1107 offset,
1108 match_len,
1109 });
1110
1111 return true;
1112 }
1113
1114 let last_entry = self.window.last_mut().unwrap();
1115 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
1116 if !last_entry.suffixes.contains_key(key) {
1117 last_entry.suffixes.insert(key, self.suffix_idx);
1118 }
1119 self.suffix_idx += 1;
1120 }
1121 }
1122
1123 #[inline(always)]
1125 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1126 let max = a.len().min(b.len());
1127 let mut off = 0usize;
1128 let lhs = a.as_ptr();
1129 let rhs = b.as_ptr();
1130
1131 match Self::detect_prefix_kernel() {
1132 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1133 PrefixKernel::X86Avx2 => {
1134 off = unsafe { Self::prefix_len_simd_avx2(lhs, rhs, max) };
1135 }
1136 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1137 PrefixKernel::X86Sse2 => {
1138 off = unsafe { Self::prefix_len_simd_sse2(lhs, rhs, max) };
1139 }
1140 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1141 PrefixKernel::Aarch64Neon => {
1142 off = unsafe { Self::prefix_len_simd_neon(lhs, rhs, max) };
1143 }
1144 PrefixKernel::Scalar => {}
1145 }
1146
1147 Self::common_prefix_len_scalar(a, b, off, max)
1148 }
1149
1150 #[inline(always)]
1151 fn common_prefix_len_scalar(a: &[u8], b: &[u8], mut off: usize, max: usize) -> usize {
1152 let chunk = core::mem::size_of::<usize>();
1153 let lhs = a.as_ptr();
1154 let rhs = b.as_ptr();
1155 while off + chunk <= max {
1156 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1157 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1158 let diff = lhs_word ^ rhs_word;
1159 if diff != 0 {
1160 return off + Self::mismatch_byte_index(diff);
1161 }
1162 off += chunk;
1163 }
1164 off + core::iter::zip(&a[off..max], &b[off..max])
1165 .take_while(|(x, y)| x == y)
1166 .count()
1167 }
1168
1169 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1170 #[target_feature(enable = "sse2")]
1171 unsafe fn prefix_len_simd_sse2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1172 let mut off = 0usize;
1173 while off + 16 <= max {
1174 let a: __m128i = unsafe { _mm_loadu_si128(lhs.add(off).cast::<__m128i>()) };
1175 let b: __m128i = unsafe { _mm_loadu_si128(rhs.add(off).cast::<__m128i>()) };
1176 let eq = _mm_cmpeq_epi8(a, b);
1177 let mask = _mm_movemask_epi8(eq) as u32;
1178 if mask != 0xFFFF {
1179 return off + (!mask).trailing_zeros() as usize;
1180 }
1181 off += 16;
1182 }
1183 off
1184 }
1185
1186 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1187 #[target_feature(enable = "avx2")]
1188 unsafe fn prefix_len_simd_avx2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1189 let mut off = 0usize;
1190 while off + 32 <= max {
1191 let a: __m256i = unsafe { _mm256_loadu_si256(lhs.add(off).cast::<__m256i>()) };
1192 let b: __m256i = unsafe { _mm256_loadu_si256(rhs.add(off).cast::<__m256i>()) };
1193 let eq = _mm256_cmpeq_epi8(a, b);
1194 let mask = _mm256_movemask_epi8(eq) as u32;
1195 if mask != u32::MAX {
1196 return off + (!mask).trailing_zeros() as usize;
1197 }
1198 off += 32;
1199 }
1200 off
1201 }
1202
1203 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1204 #[target_feature(enable = "neon")]
1205 unsafe fn prefix_len_simd_neon(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1206 let mut off = 0usize;
1207 while off + 16 <= max {
1208 let a: uint8x16_t = unsafe { vld1q_u8(lhs.add(off)) };
1209 let b: uint8x16_t = unsafe { vld1q_u8(rhs.add(off)) };
1210 let eq = vceqq_u8(a, b);
1211 let lanes = vreinterpretq_u64_u8(eq);
1212 let low = vgetq_lane_u64(lanes, 0);
1213 if low != u64::MAX {
1214 let diff = low ^ u64::MAX;
1215 return off + Self::mismatch_byte_index(diff as usize);
1216 }
1217 let high = vgetq_lane_u64(lanes, 1);
1218 if high != u64::MAX {
1219 let diff = high ^ u64::MAX;
1220 return off + 8 + Self::mismatch_byte_index(diff as usize);
1221 }
1222 off += 16;
1223 }
1224 off
1225 }
1226
1227 #[inline(always)]
1229 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1230 let start = self.suffix_idx;
1231 let last_entry = self.window.last_mut().unwrap();
1232 if last_entry.data.len() < MIN_MATCH_LEN {
1233 return;
1234 }
1235 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1236 if insert_limit > start {
1237 let data = last_entry.data.as_slice();
1238 let suffixes = &mut last_entry.suffixes;
1239 if fill_step == FAST_HASH_FILL_STEP {
1240 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1241 } else {
1242 let mut pos = start;
1243 while pos < insert_limit {
1244 Self::insert_suffix_if_absent(data, suffixes, pos);
1245 pos += fill_step;
1246 }
1247 }
1248 }
1249
1250 if idx >= start + MIN_MATCH_LEN {
1251 let tail_start = idx - MIN_MATCH_LEN;
1252 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1253 if !last_entry.suffixes.contains_key(tail_key) {
1254 last_entry.suffixes.insert(tail_key, tail_start);
1255 }
1256 }
1257 }
1258
1259 #[inline(always)]
1260 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1261 debug_assert!(
1262 pos + MIN_MATCH_LEN <= data.len(),
1263 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1264 pos,
1265 MIN_MATCH_LEN,
1266 data.len()
1267 );
1268 let key = &data[pos..pos + MIN_MATCH_LEN];
1269 if !suffixes.contains_key(key) {
1270 suffixes.insert(key, pos);
1271 }
1272 }
1273
1274 #[inline(always)]
1275 fn add_suffixes_interleaved_fast(
1276 data: &[u8],
1277 suffixes: &mut SuffixStore,
1278 start: usize,
1279 insert_limit: usize,
1280 ) {
1281 let lane = FAST_HASH_FILL_STEP;
1282 let mut pos = start;
1283
1284 while pos + lane * 3 < insert_limit {
1287 let p0 = pos;
1288 let p1 = pos + lane;
1289 let p2 = pos + lane * 2;
1290 let p3 = pos + lane * 3;
1291
1292 Self::insert_suffix_if_absent(data, suffixes, p0);
1293 Self::insert_suffix_if_absent(data, suffixes, p1);
1294 Self::insert_suffix_if_absent(data, suffixes, p2);
1295 Self::insert_suffix_if_absent(data, suffixes, p3);
1296
1297 pos += lane * 4;
1298 }
1299
1300 while pos < insert_limit {
1301 Self::insert_suffix_if_absent(data, suffixes, pos);
1302 pos += lane;
1303 }
1304 }
1305
1306 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1307 if literals_len != 0 {
1308 return None;
1309 }
1310
1311 let reps = [
1312 Some(self.offset_hist[1] as usize),
1313 Some(self.offset_hist[2] as usize),
1314 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1315 ];
1316
1317 let mut best: Option<(usize, usize)> = None;
1318 let mut seen = [0usize; 3];
1319 let mut seen_len = 0usize;
1320 for offset in reps.into_iter().flatten() {
1321 if offset == 0 {
1322 continue;
1323 }
1324 if seen[..seen_len].contains(&offset) {
1325 continue;
1326 }
1327 seen[seen_len] = offset;
1328 seen_len += 1;
1329
1330 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1331 continue;
1332 };
1333 if match_len < MIN_MATCH_LEN {
1334 continue;
1335 }
1336
1337 if best.is_none_or(|(old_offset, old_len)| {
1338 match_len > old_len || (match_len == old_len && offset < old_offset)
1339 }) {
1340 best = Some((offset, match_len));
1341 }
1342 }
1343 best
1344 }
1345
1346 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1347 if offset == 0 {
1348 return None;
1349 }
1350
1351 let last_idx = self.window.len().checked_sub(1)?;
1352 let last_entry = &self.window[last_idx];
1353 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1354 if offset > searchable_prefix {
1355 return None;
1356 }
1357
1358 let mut remaining = offset;
1359 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1360 (last_idx, self.suffix_idx - remaining)
1361 } else {
1362 remaining -= self.suffix_idx;
1363 let mut found = None;
1364 for entry_idx in (0..last_idx).rev() {
1365 let len = self.window[entry_idx].data.len();
1366 if remaining <= len {
1367 found = Some((entry_idx, len - remaining));
1368 break;
1369 }
1370 remaining -= len;
1371 }
1372 found?
1373 };
1374
1375 let match_entry = &self.window[entry_idx];
1376 let match_slice = &match_entry.data[match_index..];
1377
1378 Some(Self::common_prefix_len(match_slice, data_slice))
1379 }
1380
1381 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1387 let len = self.window.last().unwrap().data.len();
1388 if incompressible_hint == Some(true) {
1389 let dense_tail = MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
1390 let sparse_end = len.saturating_sub(dense_tail);
1391 self.add_suffixes_till(sparse_end, INCOMPRESSIBLE_SKIP_STEP);
1392 self.suffix_idx = sparse_end;
1393 self.add_suffixes_till(len, 1);
1394 } else {
1395 self.add_suffixes_till(len, 1);
1396 }
1397 self.suffix_idx = len;
1398 self.last_idx_in_sequence = len;
1399 }
1400
1401 #[cfg(test)]
1403 fn skip_matching(&mut self) {
1404 self.skip_matching_with_hint(None);
1405 }
1406
1407 fn add_data(
1410 &mut self,
1411 data: Vec<u8>,
1412 suffixes: SuffixStore,
1413 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1414 ) {
1415 assert!(
1416 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1417 );
1418 self.reserve(data.len(), reuse_space);
1419 #[cfg(debug_assertions)]
1420 self.concat_window.extend_from_slice(&data);
1421
1422 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1423 for entry in self.window.iter_mut() {
1424 entry.base_offset += last_len;
1425 }
1426 }
1427
1428 let len = data.len();
1429 self.window.push(WindowEntry {
1430 data,
1431 suffixes,
1432 base_offset: 0,
1433 });
1434 self.window_size += len;
1435 self.suffix_idx = 0;
1436 self.last_idx_in_sequence = 0;
1437 }
1438
1439 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1442 assert!(self.max_window_size >= amount);
1443 while self.window_size + amount > self.max_window_size {
1444 let removed = self.window.remove(0);
1445 self.window_size -= removed.data.len();
1446 #[cfg(debug_assertions)]
1447 self.concat_window.drain(0..removed.data.len());
1448
1449 let WindowEntry {
1450 suffixes,
1451 data: leaked_vec,
1452 base_offset: _,
1453 } = removed;
1454 reuse_space(leaked_vec, suffixes);
1455 }
1456 }
1457}
1458
1459struct DfastMatchGenerator {
1460 max_window_size: usize,
1461 window: VecDeque<Vec<u8>>,
1462 window_size: usize,
1463 history: Vec<u8>,
1466 history_start: usize,
1467 history_abs_start: usize,
1468 offset_hist: [u32; 3],
1469 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1470 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1471 hash_bits: usize,
1472 use_fast_loop: bool,
1473 lazy_depth: u8,
1475}
1476
1477#[derive(Copy, Clone)]
1478struct MatchCandidate {
1479 start: usize,
1480 offset: usize,
1481 match_len: usize,
1482}
1483
1484fn best_len_offset_candidate(
1485 lhs: Option<MatchCandidate>,
1486 rhs: Option<MatchCandidate>,
1487) -> Option<MatchCandidate> {
1488 match (lhs, rhs) {
1489 (None, other) | (other, None) => other,
1490 (Some(lhs), Some(rhs)) => {
1491 if rhs.match_len > lhs.match_len
1492 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1493 {
1494 Some(rhs)
1495 } else {
1496 Some(lhs)
1497 }
1498 }
1499 }
1500}
1501
1502fn extend_backwards_shared(
1503 concat: &[u8],
1504 history_abs_start: usize,
1505 mut candidate_pos: usize,
1506 mut abs_pos: usize,
1507 mut match_len: usize,
1508 lit_len: usize,
1509) -> MatchCandidate {
1510 let min_abs_pos = abs_pos - lit_len;
1511 while abs_pos > min_abs_pos
1512 && candidate_pos > history_abs_start
1513 && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1514 {
1515 candidate_pos -= 1;
1516 abs_pos -= 1;
1517 match_len += 1;
1518 }
1519 MatchCandidate {
1520 start: abs_pos,
1521 offset: abs_pos - candidate_pos,
1522 match_len,
1523 }
1524}
1525
1526fn repcode_candidate_shared(
1527 concat: &[u8],
1528 history_abs_start: usize,
1529 offset_hist: [u32; 3],
1530 abs_pos: usize,
1531 lit_len: usize,
1532 min_match_len: usize,
1533) -> Option<MatchCandidate> {
1534 let reps = if lit_len == 0 {
1535 [
1536 Some(offset_hist[1] as usize),
1537 Some(offset_hist[2] as usize),
1538 (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1539 ]
1540 } else {
1541 [
1542 Some(offset_hist[0] as usize),
1543 Some(offset_hist[1] as usize),
1544 Some(offset_hist[2] as usize),
1545 ]
1546 };
1547
1548 let current_idx = abs_pos - history_abs_start;
1549 if current_idx + min_match_len > concat.len() {
1550 return None;
1551 }
1552
1553 let mut best = None;
1554 for rep in reps.into_iter().flatten() {
1555 if rep == 0 || rep > abs_pos {
1556 continue;
1557 }
1558 let candidate_pos = abs_pos - rep;
1559 if candidate_pos < history_abs_start {
1560 continue;
1561 }
1562 let candidate_idx = candidate_pos - history_abs_start;
1563 let match_len =
1564 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1565 if match_len >= min_match_len {
1566 let candidate = extend_backwards_shared(
1567 concat,
1568 history_abs_start,
1569 candidate_pos,
1570 abs_pos,
1571 match_len,
1572 lit_len,
1573 );
1574 best = best_len_offset_candidate(best, Some(candidate));
1575 }
1576 }
1577 best
1578}
1579
1580#[derive(Copy, Clone)]
1581struct LazyMatchConfig {
1582 target_len: usize,
1583 min_match_len: usize,
1584 lazy_depth: u8,
1585 history_abs_end: usize,
1586}
1587
1588fn pick_lazy_match_shared(
1589 abs_pos: usize,
1590 lit_len: usize,
1591 best: Option<MatchCandidate>,
1592 config: LazyMatchConfig,
1593 mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1594) -> Option<MatchCandidate> {
1595 let best = best?;
1596 if best.match_len >= config.target_len
1597 || abs_pos + 1 + config.min_match_len > config.history_abs_end
1598 {
1599 return Some(best);
1600 }
1601
1602 let next = best_match_at(abs_pos + 1, lit_len + 1);
1603 if let Some(next) = next
1604 && (next.match_len > best.match_len
1605 || (next.match_len == best.match_len && next.offset < best.offset))
1606 {
1607 return None;
1608 }
1609
1610 if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1611 let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1612 if let Some(next2) = next2
1613 && next2.match_len > best.match_len + 1
1614 {
1615 return None;
1616 }
1617 }
1618
1619 Some(best)
1620}
1621
1622impl DfastMatchGenerator {
1623 const BOUNDARY_DENSE_TAIL_LEN: usize = DFAST_MIN_MATCH_LEN + 3;
1630
1631 fn new(max_window_size: usize) -> Self {
1632 Self {
1633 max_window_size,
1634 window: VecDeque::new(),
1635 window_size: 0,
1636 history: Vec::new(),
1637 history_start: 0,
1638 history_abs_start: 0,
1639 offset_hist: [1, 4, 8],
1640 short_hash: Vec::new(),
1641 long_hash: Vec::new(),
1642 hash_bits: DFAST_HASH_BITS,
1643 use_fast_loop: false,
1644 lazy_depth: 1,
1645 }
1646 }
1647
1648 fn set_hash_bits(&mut self, bits: usize) {
1649 let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1650 if self.hash_bits != clamped {
1651 self.hash_bits = clamped;
1652 self.short_hash = Vec::new();
1653 self.long_hash = Vec::new();
1654 }
1655 }
1656
1657 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1658 self.window_size = 0;
1659 self.history.clear();
1660 self.history_start = 0;
1661 self.history_abs_start = 0;
1662 self.offset_hist = [1, 4, 8];
1663 if !self.short_hash.is_empty() {
1664 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1665 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1666 }
1667 for mut data in self.window.drain(..) {
1668 data.resize(data.capacity(), 0);
1669 reuse_space(data);
1670 }
1671 }
1672
1673 fn get_last_space(&self) -> &[u8] {
1674 self.window.back().unwrap().as_slice()
1675 }
1676
1677 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1678 assert!(data.len() <= self.max_window_size);
1679 while self.window_size + data.len() > self.max_window_size {
1680 let removed = self.window.pop_front().unwrap();
1681 self.window_size -= removed.len();
1682 self.history_start += removed.len();
1683 self.history_abs_start += removed.len();
1684 reuse_space(removed);
1685 }
1686 self.compact_history();
1687 self.history.extend_from_slice(&data);
1688 self.window_size += data.len();
1689 self.window.push_back(data);
1690 }
1691
1692 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1693 while self.window_size > self.max_window_size {
1694 let removed = self.window.pop_front().unwrap();
1695 self.window_size -= removed.len();
1696 self.history_start += removed.len();
1697 self.history_abs_start += removed.len();
1698 reuse_space(removed);
1699 }
1700 }
1701
1702 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
1703 self.ensure_hash_tables();
1704 let current_len = self.window.back().unwrap().len();
1705 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1706 let current_abs_end = current_abs_start + current_len;
1707 let tail_start = current_abs_start.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1708 if tail_start < current_abs_start {
1709 self.insert_positions(tail_start, current_abs_start);
1710 }
1711
1712 let used_sparse = incompressible_hint
1713 .unwrap_or_else(|| self.block_looks_incompressible(current_abs_start, current_abs_end));
1714 if used_sparse {
1715 self.insert_positions_with_step(
1716 current_abs_start,
1717 current_abs_end,
1718 DFAST_INCOMPRESSIBLE_SKIP_STEP,
1719 );
1720 } else {
1721 self.insert_positions(current_abs_start, current_abs_end);
1722 }
1723
1724 if used_sparse {
1727 let tail_start = current_abs_end
1728 .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1729 .max(current_abs_start);
1730 if tail_start < current_abs_end {
1731 self.insert_positions(tail_start, current_abs_end);
1732 }
1733 }
1734 }
1735
1736 fn skip_matching_dense(&mut self) {
1737 self.ensure_hash_tables();
1738 let current_len = self.window.back().unwrap().len();
1739 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1740 let current_abs_end = current_abs_start + current_len;
1741 let backfill_start = current_abs_start
1742 .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1743 .max(self.history_abs_start);
1744 if backfill_start < current_abs_start {
1745 self.insert_positions(backfill_start, current_abs_start);
1746 }
1747 self.insert_positions(current_abs_start, current_abs_end);
1748 }
1749
1750 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1751 self.ensure_hash_tables();
1752
1753 let current_len = self.window.back().unwrap().len();
1754 if current_len == 0 {
1755 return;
1756 }
1757
1758 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1759 if self.use_fast_loop {
1760 self.start_matching_fast_loop(current_abs_start, current_len, &mut handle_sequence);
1761 return;
1762 }
1763 self.start_matching_general(current_abs_start, current_len, &mut handle_sequence);
1764 }
1765
1766 fn start_matching_general(
1767 &mut self,
1768 current_abs_start: usize,
1769 current_len: usize,
1770 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1771 ) {
1772 let use_adaptive_skip =
1773 self.block_looks_incompressible(current_abs_start, current_abs_start + current_len);
1774 let mut pos = 0usize;
1775 let mut literals_start = 0usize;
1776 let mut skip_step = 1usize;
1777 let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1778 let mut miss_run = 0usize;
1779 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1780 let abs_pos = current_abs_start + pos;
1781 let lit_len = pos - literals_start;
1782
1783 let best = self.best_match(abs_pos, lit_len);
1784 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1785 let start = self.emit_candidate(
1786 current_abs_start,
1787 &mut literals_start,
1788 candidate,
1789 handle_sequence,
1790 );
1791 pos = start + candidate.match_len;
1792 skip_step = 1;
1793 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1794 miss_run = 0;
1795 } else {
1796 self.insert_position(abs_pos);
1797 miss_run = miss_run.saturating_add(1);
1798 let use_local_adaptive_skip = miss_run >= DFAST_LOCAL_SKIP_TRIGGER;
1799 if use_adaptive_skip || use_local_adaptive_skip {
1800 let skip_cap = if use_adaptive_skip {
1801 DFAST_MAX_SKIP_STEP
1802 } else {
1803 2
1804 };
1805 if pos >= next_skip_growth_pos {
1806 skip_step = (skip_step + 1).min(skip_cap);
1807 next_skip_growth_pos =
1808 next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1809 }
1810 pos = pos.saturating_add(skip_step);
1811 } else {
1812 pos += 1;
1813 }
1814 }
1815 }
1816
1817 self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1818 self.emit_trailing_literals(literals_start, handle_sequence);
1819 }
1820
1821 fn start_matching_fast_loop(
1822 &mut self,
1823 current_abs_start: usize,
1824 current_len: usize,
1825 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1826 ) {
1827 let block_is_strict_incompressible = self
1828 .block_looks_incompressible_strict(current_abs_start, current_abs_start + current_len);
1829 let mut pos = 0usize;
1830 let mut literals_start = 0usize;
1831 let mut skip_step = 1usize;
1832 let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1833 let mut miss_run = 0usize;
1834 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1835 let ip0 = pos;
1836 let ip1 = ip0.saturating_add(1);
1837 let ip2 = ip0.saturating_add(2);
1838 let ip3 = ip0.saturating_add(3);
1839
1840 let abs_ip0 = current_abs_start + ip0;
1841 let lit_len_ip0 = ip0 - literals_start;
1842
1843 if ip2 + DFAST_MIN_MATCH_LEN <= current_len {
1844 let abs_ip2 = current_abs_start + ip2;
1845 let lit_len_ip2 = ip2 - literals_start;
1846 if let Some(rep) = self.repcode_candidate(abs_ip2, lit_len_ip2)
1847 && rep.start >= current_abs_start + literals_start
1848 && rep.start <= abs_ip2
1849 {
1850 let start = self.emit_candidate(
1851 current_abs_start,
1852 &mut literals_start,
1853 rep,
1854 handle_sequence,
1855 );
1856 pos = start + rep.match_len;
1857 skip_step = 1;
1858 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1859 miss_run = 0;
1860 continue;
1861 }
1862 }
1863
1864 let best = self.best_match(abs_ip0, lit_len_ip0);
1865 if let Some(candidate) = best {
1866 let start = self.emit_candidate(
1867 current_abs_start,
1868 &mut literals_start,
1869 candidate,
1870 handle_sequence,
1871 );
1872 pos = start + candidate.match_len;
1873 skip_step = 1;
1874 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1875 miss_run = 0;
1876 } else {
1877 self.insert_position(abs_ip0);
1878 if ip1 + 4 <= current_len {
1879 self.insert_position(current_abs_start + ip1);
1880 }
1881 if ip2 + 4 <= current_len {
1882 self.insert_position(current_abs_start + ip2);
1883 }
1884 if ip3 + 4 <= current_len {
1885 self.insert_position(current_abs_start + ip3);
1886 }
1887 miss_run = miss_run.saturating_add(1);
1888 if block_is_strict_incompressible || miss_run >= DFAST_LOCAL_SKIP_TRIGGER {
1889 let skip_cap = DFAST_MAX_SKIP_STEP;
1890 if pos >= next_skip_growth_pos {
1891 skip_step = (skip_step + 1).min(skip_cap);
1892 next_skip_growth_pos =
1893 next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1894 }
1895 pos = pos.saturating_add(skip_step);
1896 } else {
1897 skip_step = 1;
1898 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1899 pos += 1;
1900 }
1901 }
1902 }
1903
1904 self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1905 self.emit_trailing_literals(literals_start, handle_sequence);
1906 }
1907
1908 fn seed_remaining_hashable_starts(
1909 &mut self,
1910 current_abs_start: usize,
1911 current_len: usize,
1912 pos: usize,
1913 ) {
1914 let boundary_tail_start = current_len.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1915 let mut seed_pos = pos.min(current_len).min(boundary_tail_start);
1916 while seed_pos + DFAST_SHORT_HASH_LOOKAHEAD <= current_len {
1917 self.insert_position(current_abs_start + seed_pos);
1918 seed_pos += 1;
1919 }
1920 }
1921
1922 fn emit_candidate(
1923 &mut self,
1924 current_abs_start: usize,
1925 literals_start: &mut usize,
1926 candidate: MatchCandidate,
1927 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1928 ) -> usize {
1929 self.insert_positions(
1930 current_abs_start + *literals_start,
1931 candidate.start + candidate.match_len,
1932 );
1933 let current = self.window.back().unwrap().as_slice();
1934 let start = candidate.start - current_abs_start;
1935 let literals = ¤t[*literals_start..start];
1936 handle_sequence(Sequence::Triple {
1937 literals,
1938 offset: candidate.offset,
1939 match_len: candidate.match_len,
1940 });
1941 let _ = encode_offset_with_history(
1942 candidate.offset as u32,
1943 literals.len() as u32,
1944 &mut self.offset_hist,
1945 );
1946 *literals_start = start + candidate.match_len;
1947 start
1948 }
1949
1950 fn emit_trailing_literals(
1951 &self,
1952 literals_start: usize,
1953 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1954 ) {
1955 if literals_start < self.window.back().unwrap().len() {
1956 let current = self.window.back().unwrap().as_slice();
1957 handle_sequence(Sequence::Literals {
1958 literals: ¤t[literals_start..],
1959 });
1960 }
1961 }
1962
1963 fn ensure_hash_tables(&mut self) {
1964 let table_len = 1usize << self.hash_bits;
1965 if self.short_hash.len() != table_len {
1966 self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1970 self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1971 }
1972 }
1973
1974 fn compact_history(&mut self) {
1975 if self.history_start == 0 {
1976 return;
1977 }
1978 if self.history_start >= self.max_window_size
1979 || self.history_start * 2 >= self.history.len()
1980 {
1981 self.history.drain(..self.history_start);
1982 self.history_start = 0;
1983 }
1984 }
1985
1986 fn live_history(&self) -> &[u8] {
1987 &self.history[self.history_start..]
1988 }
1989
1990 fn history_abs_end(&self) -> usize {
1991 self.history_abs_start + self.live_history().len()
1992 }
1993
1994 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1995 let rep = self.repcode_candidate(abs_pos, lit_len);
1996 let hash = self.hash_candidate(abs_pos, lit_len);
1997 best_len_offset_candidate(rep, hash)
1998 }
1999
2000 fn pick_lazy_match(
2001 &self,
2002 abs_pos: usize,
2003 lit_len: usize,
2004 best: Option<MatchCandidate>,
2005 ) -> Option<MatchCandidate> {
2006 pick_lazy_match_shared(
2007 abs_pos,
2008 lit_len,
2009 best,
2010 LazyMatchConfig {
2011 target_len: DFAST_TARGET_LEN,
2012 min_match_len: DFAST_MIN_MATCH_LEN,
2013 lazy_depth: self.lazy_depth,
2014 history_abs_end: self.history_abs_end(),
2015 },
2016 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2017 )
2018 }
2019
2020 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2021 repcode_candidate_shared(
2022 self.live_history(),
2023 self.history_abs_start,
2024 self.offset_hist,
2025 abs_pos,
2026 lit_len,
2027 DFAST_MIN_MATCH_LEN,
2028 )
2029 }
2030
2031 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2032 let concat = self.live_history();
2033 let current_idx = abs_pos - self.history_abs_start;
2034 let mut best = None;
2035 for candidate_pos in self.long_candidates(abs_pos) {
2036 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
2037 continue;
2038 }
2039 let candidate_idx = candidate_pos - self.history_abs_start;
2040 let match_len =
2041 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2042 if match_len >= DFAST_MIN_MATCH_LEN {
2043 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2044 best = best_len_offset_candidate(best, Some(candidate));
2045 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
2046 return best;
2047 }
2048 }
2049 }
2050
2051 for candidate_pos in self.short_candidates(abs_pos) {
2052 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
2053 continue;
2054 }
2055 let candidate_idx = candidate_pos - self.history_abs_start;
2056 let match_len =
2057 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2058 if match_len >= DFAST_MIN_MATCH_LEN {
2059 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2060 best = best_len_offset_candidate(best, Some(candidate));
2061 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
2062 return best;
2063 }
2064 }
2065 }
2066 best
2067 }
2068
2069 fn extend_backwards(
2070 &self,
2071 candidate_pos: usize,
2072 abs_pos: usize,
2073 match_len: usize,
2074 lit_len: usize,
2075 ) -> MatchCandidate {
2076 extend_backwards_shared(
2077 self.live_history(),
2078 self.history_abs_start,
2079 candidate_pos,
2080 abs_pos,
2081 match_len,
2082 lit_len,
2083 )
2084 }
2085
2086 fn insert_positions(&mut self, start: usize, end: usize) {
2087 let start = start.max(self.history_abs_start);
2088 let end = end.min(self.history_abs_end());
2089 for pos in start..end {
2090 self.insert_position(pos);
2091 }
2092 }
2093
2094 fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2095 let start = start.max(self.history_abs_start);
2096 let end = end.min(self.history_abs_end());
2097 if step <= 1 {
2098 self.insert_positions(start, end);
2099 return;
2100 }
2101 let mut pos = start;
2102 while pos < end {
2103 self.insert_position(pos);
2104 pos = pos.saturating_add(step);
2105 }
2106 }
2107
2108 fn insert_position(&mut self, pos: usize) {
2109 let idx = pos - self.history_abs_start;
2110 let short = {
2111 let concat = self.live_history();
2112 (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
2113 };
2114 if let Some(short) = short {
2115 let bucket = &mut self.short_hash[short];
2116 if bucket[0] != pos {
2117 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2118 bucket[0] = pos;
2119 }
2120 }
2121
2122 let long = {
2123 let concat = self.live_history();
2124 (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
2125 };
2126 if let Some(long) = long {
2127 let bucket = &mut self.long_hash[long];
2128 if bucket[0] != pos {
2129 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2130 bucket[0] = pos;
2131 }
2132 }
2133 }
2134
2135 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
2136 let concat = self.live_history();
2137 let idx = pos - self.history_abs_start;
2138 (idx + 4 <= concat.len())
2139 .then(|| self.short_hash[self.hash4(&concat[idx..])])
2140 .into_iter()
2141 .flatten()
2142 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
2143 }
2144
2145 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
2146 let concat = self.live_history();
2147 let idx = pos - self.history_abs_start;
2148 (idx + 8 <= concat.len())
2149 .then(|| self.long_hash[self.hash8(&concat[idx..])])
2150 .into_iter()
2151 .flatten()
2152 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
2153 }
2154
2155 fn hash4(&self, data: &[u8]) -> usize {
2156 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2157 self.hash_index(value)
2158 }
2159
2160 fn hash8(&self, data: &[u8]) -> usize {
2161 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
2162 self.hash_index(value)
2163 }
2164
2165 fn block_looks_incompressible(&self, start: usize, end: usize) -> bool {
2166 let live = self.live_history();
2167 if start >= end || start < self.history_abs_start {
2168 return false;
2169 }
2170 let start_idx = start - self.history_abs_start;
2171 let end_idx = end - self.history_abs_start;
2172 if end_idx > live.len() {
2173 return false;
2174 }
2175 let block = &live[start_idx..end_idx];
2176 block_looks_incompressible(block)
2177 }
2178
2179 fn block_looks_incompressible_strict(&self, start: usize, end: usize) -> bool {
2180 let live = self.live_history();
2181 if start >= end || start < self.history_abs_start {
2182 return false;
2183 }
2184 let start_idx = start - self.history_abs_start;
2185 let end_idx = end - self.history_abs_start;
2186 if end_idx > live.len() {
2187 return false;
2188 }
2189 let block = &live[start_idx..end_idx];
2190 block_looks_incompressible_strict(block)
2191 }
2192
2193 fn hash_index(&self, value: u64) -> usize {
2194 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2195 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
2196 }
2197}
2198
2199struct RowMatchGenerator {
2200 max_window_size: usize,
2201 window: VecDeque<Vec<u8>>,
2202 window_size: usize,
2203 history: Vec<u8>,
2204 history_start: usize,
2205 history_abs_start: usize,
2206 offset_hist: [u32; 3],
2207 row_hash_log: usize,
2208 row_log: usize,
2209 search_depth: usize,
2210 target_len: usize,
2211 lazy_depth: u8,
2212 row_heads: Vec<u8>,
2213 row_positions: Vec<usize>,
2214 row_tags: Vec<u8>,
2215}
2216
2217impl RowMatchGenerator {
2218 fn new(max_window_size: usize) -> Self {
2219 Self {
2220 max_window_size,
2221 window: VecDeque::new(),
2222 window_size: 0,
2223 history: Vec::new(),
2224 history_start: 0,
2225 history_abs_start: 0,
2226 offset_hist: [1, 4, 8],
2227 row_hash_log: ROW_HASH_BITS - ROW_LOG,
2228 row_log: ROW_LOG,
2229 search_depth: ROW_SEARCH_DEPTH,
2230 target_len: ROW_TARGET_LEN,
2231 lazy_depth: 1,
2232 row_heads: Vec::new(),
2233 row_positions: Vec::new(),
2234 row_tags: Vec::new(),
2235 }
2236 }
2237
2238 fn set_hash_bits(&mut self, bits: usize) {
2239 let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
2240 let row_hash_log = clamped.saturating_sub(self.row_log);
2241 if self.row_hash_log != row_hash_log {
2242 self.row_hash_log = row_hash_log;
2243 self.row_heads.clear();
2244 self.row_positions.clear();
2245 self.row_tags.clear();
2246 }
2247 }
2248
2249 fn configure(&mut self, config: RowConfig) {
2250 self.row_log = config.row_log.clamp(4, 6);
2251 self.search_depth = config.search_depth;
2252 self.target_len = config.target_len;
2253 self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
2254 }
2255
2256 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2257 self.window_size = 0;
2258 self.history.clear();
2259 self.history_start = 0;
2260 self.history_abs_start = 0;
2261 self.offset_hist = [1, 4, 8];
2262 self.row_heads.fill(0);
2263 self.row_positions.fill(ROW_EMPTY_SLOT);
2264 self.row_tags.fill(0);
2265 for mut data in self.window.drain(..) {
2266 data.resize(data.capacity(), 0);
2267 reuse_space(data);
2268 }
2269 }
2270
2271 fn get_last_space(&self) -> &[u8] {
2272 self.window.back().unwrap().as_slice()
2273 }
2274
2275 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2276 assert!(data.len() <= self.max_window_size);
2277 while self.window_size + data.len() > self.max_window_size {
2278 let removed = self.window.pop_front().unwrap();
2279 self.window_size -= removed.len();
2280 self.history_start += removed.len();
2281 self.history_abs_start += removed.len();
2282 reuse_space(removed);
2283 }
2284 self.compact_history();
2285 self.history.extend_from_slice(&data);
2286 self.window_size += data.len();
2287 self.window.push_back(data);
2288 }
2289
2290 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2291 while self.window_size > self.max_window_size {
2292 let removed = self.window.pop_front().unwrap();
2293 self.window_size -= removed.len();
2294 self.history_start += removed.len();
2295 self.history_abs_start += removed.len();
2296 reuse_space(removed);
2297 }
2298 }
2299
2300 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
2301 self.ensure_tables();
2302 let current_len = self.window.back().unwrap().len();
2303 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2304 let current_abs_end = current_abs_start + current_len;
2305 let backfill_start = self.backfill_start(current_abs_start);
2306 if backfill_start < current_abs_start {
2307 self.insert_positions(backfill_start, current_abs_start);
2308 }
2309 if incompressible_hint == Some(true) {
2310 self.insert_positions_with_step(
2311 current_abs_start,
2312 current_abs_end,
2313 INCOMPRESSIBLE_SKIP_STEP,
2314 );
2315 let dense_tail = ROW_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
2316 let tail_start = current_abs_end
2317 .saturating_sub(dense_tail)
2318 .max(current_abs_start);
2319 for pos in tail_start..current_abs_end {
2320 if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
2321 self.insert_position(pos);
2322 }
2323 }
2324 } else {
2325 self.insert_positions(current_abs_start, current_abs_end);
2326 }
2327 }
2328
2329 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2330 self.ensure_tables();
2331
2332 let current_len = self.window.back().unwrap().len();
2333 if current_len == 0 {
2334 return;
2335 }
2336 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2337 let backfill_start = self.backfill_start(current_abs_start);
2338 if backfill_start < current_abs_start {
2339 self.insert_positions(backfill_start, current_abs_start);
2340 }
2341
2342 let mut pos = 0usize;
2343 let mut literals_start = 0usize;
2344 while pos + ROW_MIN_MATCH_LEN <= current_len {
2345 let abs_pos = current_abs_start + pos;
2346 let lit_len = pos - literals_start;
2347
2348 let best = self.best_match(abs_pos, lit_len);
2349 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2350 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2351 let current = self.window.back().unwrap().as_slice();
2352 let start = candidate.start - current_abs_start;
2353 let literals = ¤t[literals_start..start];
2354 handle_sequence(Sequence::Triple {
2355 literals,
2356 offset: candidate.offset,
2357 match_len: candidate.match_len,
2358 });
2359 let _ = encode_offset_with_history(
2360 candidate.offset as u32,
2361 literals.len() as u32,
2362 &mut self.offset_hist,
2363 );
2364 pos = start + candidate.match_len;
2365 literals_start = pos;
2366 } else {
2367 self.insert_position(abs_pos);
2368 pos += 1;
2369 }
2370 }
2371
2372 while pos + ROW_HASH_KEY_LEN <= current_len {
2373 self.insert_position(current_abs_start + pos);
2374 pos += 1;
2375 }
2376
2377 if literals_start < current_len {
2378 let current = self.window.back().unwrap().as_slice();
2379 handle_sequence(Sequence::Literals {
2380 literals: ¤t[literals_start..],
2381 });
2382 }
2383 }
2384
2385 fn ensure_tables(&mut self) {
2386 let row_count = 1usize << self.row_hash_log;
2387 let row_entries = 1usize << self.row_log;
2388 let total = row_count * row_entries;
2389 if self.row_positions.len() != total {
2390 self.row_heads = alloc::vec![0; row_count];
2391 self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
2392 self.row_tags = alloc::vec![0; total];
2393 }
2394 }
2395
2396 fn compact_history(&mut self) {
2397 if self.history_start == 0 {
2398 return;
2399 }
2400 if self.history_start >= self.max_window_size
2401 || self.history_start * 2 >= self.history.len()
2402 {
2403 self.history.drain(..self.history_start);
2404 self.history_start = 0;
2405 }
2406 }
2407
2408 fn live_history(&self) -> &[u8] {
2409 &self.history[self.history_start..]
2410 }
2411
2412 fn history_abs_end(&self) -> usize {
2413 self.history_abs_start + self.live_history().len()
2414 }
2415
2416 fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
2417 let idx = abs_pos - self.history_abs_start;
2418 let concat = self.live_history();
2419 if idx + ROW_HASH_KEY_LEN > concat.len() {
2420 return None;
2421 }
2422 let value =
2423 u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
2424 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2425 let hash = value.wrapping_mul(PRIME);
2426 let total_bits = self.row_hash_log + ROW_TAG_BITS;
2427 let combined = hash >> (u64::BITS as usize - total_bits);
2428 let row_mask = (1usize << self.row_hash_log) - 1;
2429 let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
2430 let tag = combined as u8;
2431 Some((row, tag))
2432 }
2433
2434 fn backfill_start(&self, current_abs_start: usize) -> usize {
2435 current_abs_start
2436 .saturating_sub(ROW_HASH_KEY_LEN - 1)
2437 .max(self.history_abs_start)
2438 }
2439
2440 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2441 let rep = self.repcode_candidate(abs_pos, lit_len);
2442 let row = self.row_candidate(abs_pos, lit_len);
2443 best_len_offset_candidate(rep, row)
2444 }
2445
2446 fn pick_lazy_match(
2447 &self,
2448 abs_pos: usize,
2449 lit_len: usize,
2450 best: Option<MatchCandidate>,
2451 ) -> Option<MatchCandidate> {
2452 pick_lazy_match_shared(
2453 abs_pos,
2454 lit_len,
2455 best,
2456 LazyMatchConfig {
2457 target_len: self.target_len,
2458 min_match_len: ROW_MIN_MATCH_LEN,
2459 lazy_depth: self.lazy_depth,
2460 history_abs_end: self.history_abs_end(),
2461 },
2462 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2463 )
2464 }
2465
2466 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2467 repcode_candidate_shared(
2468 self.live_history(),
2469 self.history_abs_start,
2470 self.offset_hist,
2471 abs_pos,
2472 lit_len,
2473 ROW_MIN_MATCH_LEN,
2474 )
2475 }
2476
2477 fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2478 let concat = self.live_history();
2479 let current_idx = abs_pos - self.history_abs_start;
2480 if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
2481 return None;
2482 }
2483
2484 let (row, tag) = self.hash_and_row(abs_pos)?;
2485 let row_entries = 1usize << self.row_log;
2486 let row_mask = row_entries - 1;
2487 let row_base = row << self.row_log;
2488 let head = self.row_heads[row] as usize;
2489 let max_walk = self.search_depth.min(row_entries);
2490
2491 let mut best = None;
2492 for i in 0..max_walk {
2493 let slot = (head + i) & row_mask;
2494 let idx = row_base + slot;
2495 if self.row_tags[idx] != tag {
2496 continue;
2497 }
2498 let candidate_pos = self.row_positions[idx];
2499 if candidate_pos == ROW_EMPTY_SLOT
2500 || candidate_pos < self.history_abs_start
2501 || candidate_pos >= abs_pos
2502 {
2503 continue;
2504 }
2505 let candidate_idx = candidate_pos - self.history_abs_start;
2506 let match_len =
2507 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2508 if match_len >= ROW_MIN_MATCH_LEN {
2509 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2510 best = best_len_offset_candidate(best, Some(candidate));
2511 if best.is_some_and(|best| best.match_len >= self.target_len) {
2512 return best;
2513 }
2514 }
2515 }
2516 best
2517 }
2518
2519 fn extend_backwards(
2520 &self,
2521 candidate_pos: usize,
2522 abs_pos: usize,
2523 match_len: usize,
2524 lit_len: usize,
2525 ) -> MatchCandidate {
2526 extend_backwards_shared(
2527 self.live_history(),
2528 self.history_abs_start,
2529 candidate_pos,
2530 abs_pos,
2531 match_len,
2532 lit_len,
2533 )
2534 }
2535
2536 fn insert_positions(&mut self, start: usize, end: usize) {
2537 for pos in start..end {
2538 self.insert_position(pos);
2539 }
2540 }
2541
2542 fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2543 if step <= 1 {
2544 self.insert_positions(start, end);
2545 return;
2546 }
2547 let mut pos = start;
2548 while pos < end {
2549 self.insert_position(pos);
2550 let next = pos.saturating_add(step);
2551 if next <= pos {
2552 break;
2553 }
2554 pos = next;
2555 }
2556 }
2557
2558 fn insert_position(&mut self, abs_pos: usize) {
2559 let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2560 return;
2561 };
2562 let row_entries = 1usize << self.row_log;
2563 let row_mask = row_entries - 1;
2564 let row_base = row << self.row_log;
2565 let head = self.row_heads[row] as usize;
2566 let next = head.wrapping_sub(1) & row_mask;
2567 self.row_heads[row] = next as u8;
2568 self.row_tags[row_base + next] = tag;
2569 self.row_positions[row_base + next] = abs_pos;
2570 }
2571}
2572
2573struct HcMatchGenerator {
2574 max_window_size: usize,
2575 window: VecDeque<Vec<u8>>,
2576 window_size: usize,
2577 history: Vec<u8>,
2578 history_start: usize,
2579 history_abs_start: usize,
2580 position_base: usize,
2581 offset_hist: [u32; 3],
2582 hash_table: Vec<u32>,
2583 chain_table: Vec<u32>,
2584 lazy_depth: u8,
2585 hash_log: usize,
2586 chain_log: usize,
2587 search_depth: usize,
2588 target_len: usize,
2589}
2590
2591impl HcMatchGenerator {
2592 fn new(max_window_size: usize) -> Self {
2593 Self {
2594 max_window_size,
2595 window: VecDeque::new(),
2596 window_size: 0,
2597 history: Vec::new(),
2598 history_start: 0,
2599 history_abs_start: 0,
2600 position_base: 0,
2601 offset_hist: [1, 4, 8],
2602 hash_table: Vec::new(),
2603 chain_table: Vec::new(),
2604 lazy_depth: 2,
2605 hash_log: HC_HASH_LOG,
2606 chain_log: HC_CHAIN_LOG,
2607 search_depth: HC_SEARCH_DEPTH,
2608 target_len: HC_TARGET_LEN,
2609 }
2610 }
2611
2612 fn configure(&mut self, config: HcConfig) {
2613 let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2614 self.hash_log = config.hash_log;
2615 self.chain_log = config.chain_log;
2616 self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2617 self.target_len = config.target_len;
2618 if resize && !self.hash_table.is_empty() {
2619 self.hash_table.clear();
2621 self.chain_table.clear();
2622 }
2623 }
2624
2625 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2626 self.window_size = 0;
2627 self.history.clear();
2628 self.history_start = 0;
2629 self.history_abs_start = 0;
2630 self.position_base = 0;
2631 self.offset_hist = [1, 4, 8];
2632 if !self.hash_table.is_empty() {
2633 self.hash_table.fill(HC_EMPTY);
2634 self.chain_table.fill(HC_EMPTY);
2635 }
2636 for mut data in self.window.drain(..) {
2637 data.resize(data.capacity(), 0);
2638 reuse_space(data);
2639 }
2640 }
2641
2642 fn get_last_space(&self) -> &[u8] {
2643 self.window.back().unwrap().as_slice()
2644 }
2645
2646 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2650 assert!(data.len() <= self.max_window_size);
2651 while self.window_size + data.len() > self.max_window_size {
2652 let removed = self.window.pop_front().unwrap();
2653 self.window_size -= removed.len();
2654 self.history_start += removed.len();
2655 self.history_abs_start += removed.len();
2656 reuse_space(removed);
2657 }
2658 self.compact_history();
2659 self.history.extend_from_slice(&data);
2660 self.window_size += data.len();
2661 self.window.push_back(data);
2662 }
2663
2664 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2665 while self.window_size > self.max_window_size {
2666 let removed = self.window.pop_front().unwrap();
2667 self.window_size -= removed.len();
2668 self.history_start += removed.len();
2669 self.history_abs_start += removed.len();
2670 reuse_space(removed);
2671 }
2672 }
2673
2674 fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2677 let backfill_start = current_abs_start
2678 .saturating_sub(3)
2679 .max(self.history_abs_start);
2680 if backfill_start < current_abs_start {
2681 self.insert_positions(backfill_start, current_abs_start);
2682 }
2683 }
2684
2685 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
2686 self.ensure_tables();
2687 let current_len = self.window.back().unwrap().len();
2688 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2689 let current_abs_end = current_abs_start + current_len;
2690 self.backfill_boundary_positions(current_abs_start);
2691 if incompressible_hint == Some(true) {
2692 self.insert_positions_with_step(
2693 current_abs_start,
2694 current_abs_end,
2695 INCOMPRESSIBLE_SKIP_STEP,
2696 );
2697 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
2698 let tail_start = current_abs_end
2699 .saturating_sub(dense_tail)
2700 .max(self.history_abs_start);
2701 let tail_start = tail_start.max(current_abs_start);
2702 for pos in tail_start..current_abs_end {
2703 if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
2704 self.insert_position(pos);
2705 }
2706 }
2707 } else {
2708 self.insert_positions(current_abs_start, current_abs_end);
2709 }
2710 }
2711
2712 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2713 self.ensure_tables();
2714
2715 let current_len = self.window.back().unwrap().len();
2716 if current_len == 0 {
2717 return;
2718 }
2719
2720 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2721 self.backfill_boundary_positions(current_abs_start);
2722
2723 let mut pos = 0usize;
2724 let mut literals_start = 0usize;
2725 while pos + HC_MIN_MATCH_LEN <= current_len {
2726 let abs_pos = current_abs_start + pos;
2727 let lit_len = pos - literals_start;
2728
2729 let best = self.find_best_match(abs_pos, lit_len);
2730 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2731 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2732 let current = self.window.back().unwrap().as_slice();
2733 let start = candidate.start - current_abs_start;
2734 let literals = ¤t[literals_start..start];
2735 handle_sequence(Sequence::Triple {
2736 literals,
2737 offset: candidate.offset,
2738 match_len: candidate.match_len,
2739 });
2740 let _ = encode_offset_with_history(
2741 candidate.offset as u32,
2742 literals.len() as u32,
2743 &mut self.offset_hist,
2744 );
2745 pos = start + candidate.match_len;
2746 literals_start = pos;
2747 } else {
2748 self.insert_position(abs_pos);
2749 pos += 1;
2750 }
2751 }
2752
2753 while pos + 4 <= current_len {
2756 self.insert_position(current_abs_start + pos);
2757 pos += 1;
2758 }
2759
2760 if literals_start < current_len {
2761 let current = self.window.back().unwrap().as_slice();
2762 handle_sequence(Sequence::Literals {
2763 literals: ¤t[literals_start..],
2764 });
2765 }
2766 }
2767
2768 fn ensure_tables(&mut self) {
2769 if self.hash_table.is_empty() {
2770 self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2771 self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2772 }
2773 }
2774
2775 fn compact_history(&mut self) {
2776 if self.history_start == 0 {
2777 return;
2778 }
2779 if self.history_start >= self.max_window_size
2780 || self.history_start * 2 >= self.history.len()
2781 {
2782 self.history.drain(..self.history_start);
2783 self.history_start = 0;
2784 }
2785 }
2786
2787 fn live_history(&self) -> &[u8] {
2788 &self.history[self.history_start..]
2789 }
2790
2791 fn history_abs_end(&self) -> usize {
2792 self.history_abs_start + self.live_history().len()
2793 }
2794
2795 fn hash_position(&self, data: &[u8]) -> usize {
2796 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2797 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2798 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2799 }
2800
2801 fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2802 let rel = abs_pos.checked_sub(self.position_base)?;
2803 let rel_u32 = u32::try_from(rel).ok()?;
2804 (rel_u32 < u32::MAX).then_some(rel_u32)
2808 }
2809
2810 fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2811 let needs_rebase = self
2812 .relative_position(abs_pos)
2813 .is_none_or(|relative| relative >= u32::MAX - 1);
2814 if !needs_rebase {
2815 return;
2816 }
2817
2818 self.position_base = self.history_abs_start;
2820 self.hash_table.fill(HC_EMPTY);
2821 self.chain_table.fill(HC_EMPTY);
2822
2823 let history_start = self.history_abs_start;
2824 for pos in history_start..abs_pos {
2827 self.insert_position_no_rebase(pos);
2828 }
2829 }
2830
2831 fn insert_position(&mut self, abs_pos: usize) {
2832 self.maybe_rebase_positions(abs_pos);
2833 self.insert_position_no_rebase(abs_pos);
2834 }
2835
2836 fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2837 let idx = abs_pos - self.history_abs_start;
2838 let concat = self.live_history();
2839 if idx + 4 > concat.len() {
2840 return;
2841 }
2842 let hash = self.hash_position(&concat[idx..]);
2843 let Some(relative_pos) = self.relative_position(abs_pos) else {
2844 return;
2845 };
2846 let stored = relative_pos + 1;
2847 let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2848 let prev = self.hash_table[hash];
2849 self.chain_table[chain_idx] = prev;
2850 self.hash_table[hash] = stored;
2851 }
2852
2853 fn insert_positions(&mut self, start: usize, end: usize) {
2854 for pos in start..end {
2855 self.insert_position(pos);
2856 }
2857 }
2858
2859 fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2860 if step == 0 {
2861 return;
2862 }
2863 let mut pos = start;
2864 while pos < end {
2865 self.insert_position(pos);
2866 let next = pos.saturating_add(step);
2867 if next <= pos {
2868 break;
2869 }
2870 pos = next;
2871 }
2872 }
2873
2874 fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2877 let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2878 let idx = abs_pos - self.history_abs_start;
2879 let concat = self.live_history();
2880 if idx + 4 > concat.len() {
2881 return buf;
2882 }
2883 let hash = self.hash_position(&concat[idx..]);
2884 let chain_mask = (1 << self.chain_log) - 1;
2885
2886 let mut cur = self.hash_table[hash];
2887 let mut filled = 0;
2888 let mut steps = 0;
2896 let max_chain_steps = self.search_depth * 4;
2897 while filled < self.search_depth && steps < max_chain_steps {
2898 if cur == HC_EMPTY {
2899 break;
2900 }
2901 let candidate_rel = cur.wrapping_sub(1) as usize;
2902 let candidate_abs = self.position_base + candidate_rel;
2903 let next = self.chain_table[candidate_rel & chain_mask];
2904 steps += 1;
2905 if next == cur {
2906 if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2909 buf[filled] = candidate_abs;
2910 }
2911 break;
2912 }
2913 cur = next;
2914 if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2915 continue;
2916 }
2917 buf[filled] = candidate_abs;
2918 filled += 1;
2919 }
2920 buf
2921 }
2922
2923 fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2924 let rep = self.repcode_candidate(abs_pos, lit_len);
2925 let hash = self.hash_chain_candidate(abs_pos, lit_len);
2926 Self::better_candidate(rep, hash)
2927 }
2928
2929 fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2930 let concat = self.live_history();
2931 let current_idx = abs_pos - self.history_abs_start;
2932 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2933 return None;
2934 }
2935
2936 let mut best: Option<MatchCandidate> = None;
2937 for candidate_abs in self.chain_candidates(abs_pos) {
2938 if candidate_abs == usize::MAX {
2939 break;
2940 }
2941 let candidate_idx = candidate_abs - self.history_abs_start;
2942 let match_len =
2943 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2944 if match_len >= HC_MIN_MATCH_LEN {
2945 let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2946 best = Self::better_candidate(best, Some(candidate));
2947 if best.is_some_and(|b| b.match_len >= self.target_len) {
2948 return best;
2949 }
2950 }
2951 }
2952 best
2953 }
2954
2955 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2956 let reps = if lit_len == 0 {
2957 [
2958 Some(self.offset_hist[1] as usize),
2959 Some(self.offset_hist[2] as usize),
2960 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2961 ]
2962 } else {
2963 [
2964 Some(self.offset_hist[0] as usize),
2965 Some(self.offset_hist[1] as usize),
2966 Some(self.offset_hist[2] as usize),
2967 ]
2968 };
2969
2970 let concat = self.live_history();
2971 let current_idx = abs_pos - self.history_abs_start;
2972 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2973 return None;
2974 }
2975
2976 let mut best = None;
2977 for rep in reps.into_iter().flatten() {
2978 if rep == 0 || rep > abs_pos {
2979 continue;
2980 }
2981 let candidate_pos = abs_pos - rep;
2982 if candidate_pos < self.history_abs_start {
2983 continue;
2984 }
2985 let candidate_idx = candidate_pos - self.history_abs_start;
2986 let match_len =
2987 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2988 if match_len >= HC_MIN_MATCH_LEN {
2989 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2990 best = Self::better_candidate(best, Some(candidate));
2991 }
2992 }
2993 best
2994 }
2995
2996 fn extend_backwards(
2997 &self,
2998 mut candidate_pos: usize,
2999 mut abs_pos: usize,
3000 mut match_len: usize,
3001 lit_len: usize,
3002 ) -> MatchCandidate {
3003 let concat = self.live_history();
3004 let min_abs_pos = abs_pos - lit_len;
3005 while abs_pos > min_abs_pos
3006 && candidate_pos > self.history_abs_start
3007 && concat[candidate_pos - self.history_abs_start - 1]
3008 == concat[abs_pos - self.history_abs_start - 1]
3009 {
3010 candidate_pos -= 1;
3011 abs_pos -= 1;
3012 match_len += 1;
3013 }
3014 MatchCandidate {
3015 start: abs_pos,
3016 offset: abs_pos - candidate_pos,
3017 match_len,
3018 }
3019 }
3020
3021 fn better_candidate(
3022 lhs: Option<MatchCandidate>,
3023 rhs: Option<MatchCandidate>,
3024 ) -> Option<MatchCandidate> {
3025 match (lhs, rhs) {
3026 (None, other) | (other, None) => other,
3027 (Some(lhs), Some(rhs)) => {
3028 let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
3029 let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
3030 if rhs_gain > lhs_gain {
3031 Some(rhs)
3032 } else {
3033 Some(lhs)
3034 }
3035 }
3036 }
3037 }
3038
3039 fn match_gain(match_len: usize, offset: usize) -> i32 {
3040 debug_assert!(
3041 offset > 0,
3042 "zstd offsets are 1-indexed, offset=0 is invalid"
3043 );
3044 let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
3045 (match_len as i32) * 4 - offset_bits
3046 }
3047
3048 fn pick_lazy_match(
3052 &self,
3053 abs_pos: usize,
3054 lit_len: usize,
3055 best: Option<MatchCandidate>,
3056 ) -> Option<MatchCandidate> {
3057 let best = best?;
3058 if best.match_len >= self.target_len
3059 || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
3060 {
3061 return Some(best);
3062 }
3063
3064 let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
3065
3066 let next = self.find_best_match(abs_pos + 1, lit_len + 1);
3068 if let Some(next) = next {
3069 let next_gain = Self::match_gain(next.match_len, next.offset);
3070 if next_gain > current_gain {
3071 return None;
3072 }
3073 }
3074
3075 if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
3077 let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
3078 if let Some(next2) = next2 {
3079 let next2_gain = Self::match_gain(next2.match_len, next2.offset);
3080 if next2_gain > current_gain + 4 {
3082 return None;
3083 }
3084 }
3085 }
3086
3087 Some(best)
3088 }
3089}
3090
3091#[test]
3092fn matches() {
3093 let mut matcher = MatchGenerator::new(1000);
3094 let mut original_data = Vec::new();
3095 let mut reconstructed = Vec::new();
3096
3097 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
3098 Sequence::Literals { literals } => {
3099 assert!(!literals.is_empty());
3100 reconstructed.extend_from_slice(literals);
3101 }
3102 Sequence::Triple {
3103 literals,
3104 offset,
3105 match_len,
3106 } => {
3107 assert!(offset > 0);
3108 assert!(match_len >= MIN_MATCH_LEN);
3109 reconstructed.extend_from_slice(literals);
3110 assert!(offset <= reconstructed.len());
3111 let start = reconstructed.len() - offset;
3112 for i in 0..match_len {
3113 let byte = reconstructed[start + i];
3114 reconstructed.push(byte);
3115 }
3116 }
3117 };
3118
3119 matcher.add_data(
3120 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
3121 SuffixStore::with_capacity(100),
3122 |_, _| {},
3123 );
3124 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
3125
3126 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3127
3128 assert!(!matcher.next_sequence(|_| {}));
3129
3130 matcher.add_data(
3131 alloc::vec![
3132 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3133 ],
3134 SuffixStore::with_capacity(100),
3135 |_, _| {},
3136 );
3137 original_data.extend_from_slice(&[
3138 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3139 ]);
3140
3141 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3142 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3143 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3144 assert!(!matcher.next_sequence(|_| {}));
3145
3146 matcher.add_data(
3147 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
3148 SuffixStore::with_capacity(100),
3149 |_, _| {},
3150 );
3151 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
3152
3153 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3154 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3155 assert!(!matcher.next_sequence(|_| {}));
3156
3157 matcher.add_data(
3158 alloc::vec![0, 0, 0, 0, 0],
3159 SuffixStore::with_capacity(100),
3160 |_, _| {},
3161 );
3162 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
3163
3164 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3165 assert!(!matcher.next_sequence(|_| {}));
3166
3167 matcher.add_data(
3168 alloc::vec![7, 8, 9, 10, 11],
3169 SuffixStore::with_capacity(100),
3170 |_, _| {},
3171 );
3172 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
3173
3174 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3175 assert!(!matcher.next_sequence(|_| {}));
3176
3177 matcher.add_data(
3178 alloc::vec![1, 3, 5, 7, 9],
3179 SuffixStore::with_capacity(100),
3180 |_, _| {},
3181 );
3182 matcher.skip_matching();
3183 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3184 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
3185 assert!(!matcher.next_sequence(|_| {}));
3186
3187 matcher.add_data(
3188 alloc::vec![1, 3, 5, 7, 9],
3189 SuffixStore::with_capacity(100),
3190 |_, _| {},
3191 );
3192 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3193
3194 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3195 assert!(!matcher.next_sequence(|_| {}));
3196
3197 matcher.add_data(
3198 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
3199 SuffixStore::with_capacity(100),
3200 |_, _| {},
3201 );
3202 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
3203
3204 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3205 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3206 assert!(!matcher.next_sequence(|_| {}));
3207
3208 assert_eq!(reconstructed, original_data);
3209}
3210
3211#[test]
3212fn dfast_matches_roundtrip_multi_block_pattern() {
3213 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
3214 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3215 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3216
3217 let mut matcher = DfastMatchGenerator::new(1 << 22);
3218 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3219 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3220 Sequence::Triple {
3221 literals,
3222 offset,
3223 match_len,
3224 } => {
3225 decoded.extend_from_slice(literals);
3226 let start = decoded.len() - offset;
3227 for i in 0..match_len {
3228 let byte = decoded[start + i];
3229 decoded.push(byte);
3230 }
3231 }
3232 };
3233
3234 matcher.add_data(first_block.clone(), |_| {});
3235 let mut history = Vec::new();
3236 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3237 assert_eq!(history, first_block);
3238
3239 matcher.add_data(second_block.clone(), |_| {});
3240 let prefix_len = history.len();
3241 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3242
3243 assert_eq!(&history[prefix_len..], second_block.as_slice());
3244}
3245
3246#[test]
3247fn driver_switches_backends_and_initializes_dfast_via_reset() {
3248 let mut driver = MatchGeneratorDriver::new(32, 2);
3249
3250 driver.reset(CompressionLevel::Default);
3251 assert_eq!(driver.active_backend, MatcherBackend::Dfast);
3252 assert_eq!(driver.window_size(), (1u64 << 22));
3253
3254 let mut first = driver.get_next_space();
3255 first[..12].copy_from_slice(b"abcabcabcabc");
3256 first.truncate(12);
3257 driver.commit_space(first);
3258 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
3259 driver.skip_matching_with_hint(None);
3260
3261 let mut second = driver.get_next_space();
3262 second[..12].copy_from_slice(b"abcabcabcabc");
3263 second.truncate(12);
3264 driver.commit_space(second);
3265
3266 let mut reconstructed = b"abcabcabcabc".to_vec();
3267 driver.start_matching(|seq| match seq {
3268 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3269 Sequence::Triple {
3270 literals,
3271 offset,
3272 match_len,
3273 } => {
3274 reconstructed.extend_from_slice(literals);
3275 let start = reconstructed.len() - offset;
3276 for i in 0..match_len {
3277 let byte = reconstructed[start + i];
3278 reconstructed.push(byte);
3279 }
3280 }
3281 });
3282 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
3283
3284 driver.reset(CompressionLevel::Fastest);
3285 assert_eq!(driver.window_size(), (1u64 << 17));
3286}
3287
3288#[test]
3289fn driver_level4_selects_row_backend() {
3290 let mut driver = MatchGeneratorDriver::new(32, 2);
3291 driver.reset(CompressionLevel::Level(4));
3292 assert_eq!(driver.active_backend, MatcherBackend::Row);
3293}
3294
3295#[test]
3296fn driver_small_source_hint_shrinks_dfast_hash_tables() {
3297 let mut driver = MatchGeneratorDriver::new(32, 2);
3298
3299 driver.reset(CompressionLevel::Level(2));
3300 let mut space = driver.get_next_space();
3301 space[..12].copy_from_slice(b"abcabcabcabc");
3302 space.truncate(12);
3303 driver.commit_space(space);
3304 driver.skip_matching_with_hint(None);
3305 let full_tables = driver.dfast_matcher().short_hash.len();
3306 assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
3307
3308 driver.set_source_size_hint(1024);
3309 driver.reset(CompressionLevel::Level(2));
3310 let mut space = driver.get_next_space();
3311 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3312 space.truncate(12);
3313 driver.commit_space(space);
3314 driver.skip_matching_with_hint(None);
3315 let hinted_tables = driver.dfast_matcher().short_hash.len();
3316
3317 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
3318 assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
3319 assert!(
3320 hinted_tables < full_tables,
3321 "tiny source hint should reduce dfast table footprint"
3322 );
3323}
3324
3325#[test]
3326fn driver_small_source_hint_shrinks_row_hash_tables() {
3327 let mut driver = MatchGeneratorDriver::new(32, 2);
3328
3329 driver.reset(CompressionLevel::Level(4));
3330 let mut space = driver.get_next_space();
3331 space[..12].copy_from_slice(b"abcabcabcabc");
3332 space.truncate(12);
3333 driver.commit_space(space);
3334 driver.skip_matching_with_hint(None);
3335 let full_rows = driver.row_matcher().row_heads.len();
3336 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
3337
3338 driver.set_source_size_hint(1024);
3339 driver.reset(CompressionLevel::Level(4));
3340 let mut space = driver.get_next_space();
3341 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3342 space.truncate(12);
3343 driver.commit_space(space);
3344 driver.skip_matching_with_hint(None);
3345 let hinted_rows = driver.row_matcher().row_heads.len();
3346
3347 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
3348 assert_eq!(
3349 hinted_rows,
3350 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
3351 );
3352 assert!(
3353 hinted_rows < full_rows,
3354 "tiny source hint should reduce row hash table footprint"
3355 );
3356}
3357
3358#[test]
3359fn row_matches_roundtrip_multi_block_pattern() {
3360 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
3361 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3362 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3363
3364 let mut matcher = RowMatchGenerator::new(1 << 22);
3365 matcher.configure(ROW_CONFIG);
3366 matcher.ensure_tables();
3367 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3368 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3369 Sequence::Triple {
3370 literals,
3371 offset,
3372 match_len,
3373 } => {
3374 decoded.extend_from_slice(literals);
3375 let start = decoded.len() - offset;
3376 for i in 0..match_len {
3377 let byte = decoded[start + i];
3378 decoded.push(byte);
3379 }
3380 }
3381 };
3382
3383 matcher.add_data(first_block.clone(), |_| {});
3384 let mut history = Vec::new();
3385 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3386 assert_eq!(history, first_block);
3387
3388 matcher.add_data(second_block.clone(), |_| {});
3389 let prefix_len = history.len();
3390 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3391
3392 assert_eq!(&history[prefix_len..], second_block.as_slice());
3393
3394 let third_block: Vec<u8> = (0u8..=255).collect();
3396 matcher.add_data(third_block.clone(), |_| {});
3397 let third_prefix = history.len();
3398 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3399 assert_eq!(&history[third_prefix..], third_block.as_slice());
3400}
3401
3402#[test]
3403fn row_short_block_emits_literals_only() {
3404 let mut matcher = RowMatchGenerator::new(1 << 22);
3405 matcher.configure(ROW_CONFIG);
3406
3407 matcher.add_data(b"abcde".to_vec(), |_| {});
3408
3409 let mut saw_triple = false;
3410 let mut reconstructed = Vec::new();
3411 matcher.start_matching(|seq| match seq {
3412 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3413 Sequence::Triple { .. } => saw_triple = true,
3414 });
3415
3416 assert!(
3417 !saw_triple,
3418 "row backend must not emit triples for short blocks"
3419 );
3420 assert_eq!(reconstructed, b"abcde");
3421
3422 saw_triple = false;
3424 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
3425 matcher.start_matching(|seq| {
3426 if let Sequence::Triple { .. } = seq {
3427 saw_triple = true;
3428 }
3429 });
3430 assert!(
3431 saw_triple,
3432 "row backend should emit triples on repeated data"
3433 );
3434}
3435
3436#[test]
3437fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
3438 let mut matcher = RowMatchGenerator::new(1 << 22);
3439 matcher.configure(ROW_CONFIG);
3440 matcher.add_data(b"abcabc".to_vec(), |_| {});
3441
3442 let best = MatchCandidate {
3443 start: 0,
3444 offset: 1,
3445 match_len: ROW_MIN_MATCH_LEN,
3446 };
3447 let picked = matcher
3448 .pick_lazy_match(0, 0, Some(best))
3449 .expect("best candidate must survive");
3450
3451 assert_eq!(picked.start, best.start);
3452 assert_eq!(picked.offset, best.offset);
3453 assert_eq!(picked.match_len, best.match_len);
3454}
3455
3456#[test]
3457fn row_backfills_previous_block_tail_for_cross_boundary_match() {
3458 let mut matcher = RowMatchGenerator::new(1 << 22);
3459 matcher.configure(ROW_CONFIG);
3460
3461 let mut first_block = alloc::vec![0xA5; 64];
3462 first_block.extend_from_slice(b"XYZ");
3463 let second_block = b"XYZXYZtail".to_vec();
3464
3465 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3466 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3467 Sequence::Triple {
3468 literals,
3469 offset,
3470 match_len,
3471 } => {
3472 decoded.extend_from_slice(literals);
3473 let start = decoded.len() - offset;
3474 for i in 0..match_len {
3475 let byte = decoded[start + i];
3476 decoded.push(byte);
3477 }
3478 }
3479 };
3480
3481 matcher.add_data(first_block.clone(), |_| {});
3482 let mut reconstructed = Vec::new();
3483 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
3484 assert_eq!(reconstructed, first_block);
3485
3486 matcher.add_data(second_block.clone(), |_| {});
3487 let mut saw_cross_boundary = false;
3488 let prefix_len = reconstructed.len();
3489 matcher.start_matching(|seq| {
3490 if let Sequence::Triple {
3491 literals,
3492 offset,
3493 match_len,
3494 } = seq
3495 && literals.is_empty()
3496 && offset == 3
3497 && match_len >= ROW_MIN_MATCH_LEN
3498 {
3499 saw_cross_boundary = true;
3500 }
3501 replay_sequence(&mut reconstructed, seq);
3502 });
3503
3504 assert!(
3505 saw_cross_boundary,
3506 "row matcher should reuse the 3-byte previous-block tail"
3507 );
3508 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
3509}
3510
3511#[test]
3512fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
3513 let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
3514
3515 let mut dense = RowMatchGenerator::new(1 << 22);
3516 dense.configure(ROW_CONFIG);
3517 dense.add_data(data.clone(), |_| {});
3518 dense.skip_matching_with_hint(Some(false));
3519 let dense_slots = dense
3520 .row_positions
3521 .iter()
3522 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
3523 .count();
3524
3525 let mut sparse = RowMatchGenerator::new(1 << 22);
3526 sparse.configure(ROW_CONFIG);
3527 sparse.add_data(data, |_| {});
3528 sparse.skip_matching_with_hint(Some(true));
3529 let sparse_slots = sparse
3530 .row_positions
3531 .iter()
3532 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
3533 .count();
3534
3535 assert!(
3536 sparse_slots < dense_slots,
3537 "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
3538 );
3539}
3540
3541#[test]
3542fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
3543 let mut driver = MatchGeneratorDriver::new(32, 2);
3544
3545 driver.reset(CompressionLevel::Level(2));
3546 let mut space = driver.get_next_space();
3547 space[..12].copy_from_slice(b"abcabcabcabc");
3548 space.truncate(12);
3549 driver.commit_space(space);
3550 driver.skip_matching_with_hint(None);
3551
3552 let table_len = driver.dfast_matcher().short_hash.len();
3553 assert_eq!(
3554 table_len,
3555 1 << DFAST_HASH_BITS,
3556 "unhinted Level(2) should keep default dfast table size"
3557 );
3558}
3559
3560#[test]
3561fn simple_backend_rejects_undersized_pooled_suffix_store() {
3562 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3563 driver.reset(CompressionLevel::Fastest);
3564
3565 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
3566
3567 let mut space = driver.get_next_space();
3568 space.clear();
3569 space.resize(4096, 0xAB);
3570 driver.commit_space(space);
3571
3572 let last_suffix_slots = driver
3573 .match_generator
3574 .window
3575 .last()
3576 .expect("window entry must exist after commit")
3577 .suffixes
3578 .slots
3579 .len();
3580 assert!(
3581 last_suffix_slots >= 4096,
3582 "undersized pooled suffix store must not be reused for larger blocks"
3583 );
3584}
3585
3586#[test]
3587fn source_hint_clamps_driver_slice_size_to_window() {
3588 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3589 driver.set_source_size_hint(1024);
3590 driver.reset(CompressionLevel::Default);
3591
3592 let window = driver.window_size() as usize;
3593 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
3594 assert_eq!(driver.slice_size, window);
3595
3596 let space = driver.get_next_space();
3597 assert_eq!(space.len(), window);
3598 driver.commit_space(space);
3599}
3600
3601#[test]
3602fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3603 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3604 driver.reset(CompressionLevel::Default);
3605
3606 let large = driver.get_next_space();
3607 let large_capacity = large.capacity();
3608 assert!(large_capacity >= 128 * 1024);
3609 driver.commit_space(large);
3610
3611 driver.set_source_size_hint(1024);
3612 driver.reset(CompressionLevel::Default);
3613
3614 let small = driver.get_next_space();
3615 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
3616 assert!(
3617 small.capacity() >= large_capacity,
3618 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3619 );
3620}
3621
3622#[test]
3623fn driver_best_to_fastest_releases_oversized_hc_tables() {
3624 let mut driver = MatchGeneratorDriver::new(32, 2);
3625
3626 driver.reset(CompressionLevel::Best);
3628 assert_eq!(driver.window_size(), (1u64 << 24));
3629
3630 let mut space = driver.get_next_space();
3632 space[..12].copy_from_slice(b"abcabcabcabc");
3633 space.truncate(12);
3634 driver.commit_space(space);
3635 driver.skip_matching_with_hint(None);
3636
3637 driver.reset(CompressionLevel::Fastest);
3639 assert_eq!(driver.window_size(), (1u64 << 17));
3640
3641 let hc = driver.hc_match_generator.as_ref().unwrap();
3643 assert!(
3644 hc.hash_table.is_empty(),
3645 "HC hash_table should be released after switching away from Best"
3646 );
3647 assert!(
3648 hc.chain_table.is_empty(),
3649 "HC chain_table should be released after switching away from Best"
3650 );
3651}
3652
3653#[test]
3654fn driver_better_to_best_resizes_hc_tables() {
3655 let mut driver = MatchGeneratorDriver::new(32, 2);
3656
3657 driver.reset(CompressionLevel::Better);
3659 assert_eq!(driver.window_size(), (1u64 << 23));
3660
3661 let mut space = driver.get_next_space();
3662 space[..12].copy_from_slice(b"abcabcabcabc");
3663 space.truncate(12);
3664 driver.commit_space(space);
3665 driver.skip_matching_with_hint(None);
3666
3667 let hc = driver.hc_match_generator.as_ref().unwrap();
3668 let better_hash_len = hc.hash_table.len();
3669 let better_chain_len = hc.chain_table.len();
3670
3671 driver.reset(CompressionLevel::Best);
3673 assert_eq!(driver.window_size(), (1u64 << 24));
3674
3675 let mut space = driver.get_next_space();
3677 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3678 space.truncate(12);
3679 driver.commit_space(space);
3680 driver.skip_matching_with_hint(None);
3681
3682 let hc = driver.hc_match_generator.as_ref().unwrap();
3683 assert!(
3684 hc.hash_table.len() > better_hash_len,
3685 "Best hash_table ({}) should be larger than Better ({})",
3686 hc.hash_table.len(),
3687 better_hash_len
3688 );
3689 assert!(
3690 hc.chain_table.len() > better_chain_len,
3691 "Best chain_table ({}) should be larger than Better ({})",
3692 hc.chain_table.len(),
3693 better_chain_len
3694 );
3695}
3696
3697#[test]
3698fn prime_with_dictionary_preserves_history_for_first_full_block() {
3699 let mut driver = MatchGeneratorDriver::new(8, 1);
3700 driver.reset(CompressionLevel::Fastest);
3701
3702 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3703
3704 let mut space = driver.get_next_space();
3705 space.clear();
3706 space.extend_from_slice(b"abcdefgh");
3707 driver.commit_space(space);
3708
3709 let mut saw_match = false;
3710 driver.start_matching(|seq| {
3711 if let Sequence::Triple {
3712 literals,
3713 offset,
3714 match_len,
3715 } = seq
3716 && literals.is_empty()
3717 && offset == 8
3718 && match_len >= MIN_MATCH_LEN
3719 {
3720 saw_match = true;
3721 }
3722 });
3723
3724 assert!(
3725 saw_match,
3726 "first full block should still match dictionary-primed history"
3727 );
3728}
3729
3730#[test]
3731fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3732 let mut driver = MatchGeneratorDriver::new(8, 1);
3733 driver.reset(CompressionLevel::Fastest);
3734
3735 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3736
3737 let mut space = driver.get_next_space();
3738 space.clear();
3739 space.extend_from_slice(b"abcdefgh");
3740 driver.commit_space(space);
3741
3742 let mut saw_match = false;
3743 driver.start_matching(|seq| {
3744 if let Sequence::Triple {
3745 literals,
3746 offset,
3747 match_len,
3748 } = seq
3749 && literals.is_empty()
3750 && offset == 24
3751 && match_len >= MIN_MATCH_LEN
3752 {
3753 saw_match = true;
3754 }
3755 });
3756
3757 assert!(
3758 saw_match,
3759 "dictionary bytes should remain addressable until frame output exceeds the live window"
3760 );
3761}
3762
3763#[test]
3764fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3765 let mut driver = MatchGeneratorDriver::new(8, 1);
3766 driver.reset(CompressionLevel::Fastest);
3767
3768 driver.prime_with_dictionary(&[], [11, 7, 3]);
3769
3770 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3771}
3772
3773#[test]
3774fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3775 let mut driver = MatchGeneratorDriver::new(8, 1);
3776 driver.reset(CompressionLevel::Level(2));
3777
3778 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3779
3780 let mut space = driver.get_next_space();
3781 space.clear();
3782 space.extend_from_slice(b"abcdefgh");
3783 driver.commit_space(space);
3784
3785 let mut saw_match = false;
3786 driver.start_matching(|seq| {
3787 if let Sequence::Triple {
3788 literals,
3789 offset,
3790 match_len,
3791 } = seq
3792 && literals.is_empty()
3793 && offset == 8
3794 && match_len >= DFAST_MIN_MATCH_LEN
3795 {
3796 saw_match = true;
3797 }
3798 });
3799
3800 assert!(
3801 saw_match,
3802 "dfast backend should match dictionary-primed history in first full block"
3803 );
3804}
3805
3806#[test]
3807fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3808 let mut driver = MatchGeneratorDriver::new(8, 1);
3809 driver.reset(CompressionLevel::Fastest);
3810
3811 let before = driver.window_size();
3812 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3813 let after = driver.window_size();
3814
3815 assert_eq!(
3816 after, before,
3817 "dictionary retention budget must not change reported frame window size"
3818 );
3819}
3820
3821#[test]
3822fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3823 let mut driver = MatchGeneratorDriver::new(8, 2);
3824 driver.reset(CompressionLevel::Fastest);
3825
3826 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3829
3830 assert!(
3831 driver
3832 .match_generator
3833 .window
3834 .iter()
3835 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3836 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3837 );
3838}
3839
3840#[test]
3841fn prime_with_dictionary_counts_only_committed_tail_budget() {
3842 let mut driver = MatchGeneratorDriver::new(8, 1);
3843 driver.reset(CompressionLevel::Fastest);
3844
3845 let before = driver.match_generator.max_window_size;
3846 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3848
3849 assert_eq!(
3850 driver.match_generator.max_window_size,
3851 before + 8,
3852 "retention budget must account only for dictionary bytes actually committed to history"
3853 );
3854}
3855
3856#[test]
3857fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3858 let mut driver = MatchGeneratorDriver::new(8, 1);
3859 driver.reset(CompressionLevel::Level(2));
3860
3861 let before = driver.dfast_matcher().max_window_size;
3862 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3865
3866 assert_eq!(
3867 driver.dfast_matcher().max_window_size,
3868 before + 12,
3869 "dfast retention budget should include 4-byte dictionary tails"
3870 );
3871}
3872
3873#[test]
3874fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3875 let mut driver = MatchGeneratorDriver::new(8, 1);
3876 driver.reset(CompressionLevel::Level(4));
3877
3878 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3879
3880 let mut space = driver.get_next_space();
3881 space.clear();
3882 space.extend_from_slice(b"abcdefgh");
3883 driver.commit_space(space);
3884
3885 let mut saw_match = false;
3886 driver.start_matching(|seq| {
3887 if let Sequence::Triple {
3888 literals,
3889 offset,
3890 match_len,
3891 } = seq
3892 && literals.is_empty()
3893 && offset == 8
3894 && match_len >= ROW_MIN_MATCH_LEN
3895 {
3896 saw_match = true;
3897 }
3898 });
3899
3900 assert!(
3901 saw_match,
3902 "row backend should match dictionary-primed history in first full block"
3903 );
3904}
3905
3906#[test]
3907fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3908 let mut driver = MatchGeneratorDriver::new(8, 1);
3909 driver.reset(CompressionLevel::Level(4));
3910
3911 let base_window = driver.row_matcher().max_window_size;
3912 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3915
3916 assert_eq!(
3917 driver.row_matcher().max_window_size,
3918 base_window + 8,
3919 "row retained window must exclude uncommitted 1-byte tail"
3920 );
3921}
3922
3923#[test]
3924fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3925 let mut driver = MatchGeneratorDriver::new(8, 1);
3926 driver.reset(CompressionLevel::Level(4));
3927 driver.row_matcher_mut().max_window_size = 8;
3929 driver.reported_window_size = 8;
3930
3931 let base_window = driver.row_matcher().max_window_size;
3932 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3933 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3934
3935 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3936 let mut space = driver.get_next_space();
3937 space.clear();
3938 space.extend_from_slice(block);
3939 driver.commit_space(space);
3940 driver.skip_matching_with_hint(None);
3941 }
3942
3943 assert_eq!(
3944 driver.dictionary_retained_budget, 0,
3945 "dictionary budget should be fully retired once primed dict slices are evicted"
3946 );
3947 assert_eq!(
3948 driver.row_matcher().max_window_size,
3949 base_window,
3950 "retired dictionary budget must not remain reusable for live history"
3951 );
3952}
3953
3954#[test]
3955fn row_get_last_space_and_reset_to_fastest_clears_window() {
3956 let mut driver = MatchGeneratorDriver::new(8, 1);
3957 driver.reset(CompressionLevel::Level(4));
3958
3959 let mut space = driver.get_next_space();
3960 space.clear();
3961 space.extend_from_slice(b"row-data");
3962 driver.commit_space(space);
3963
3964 assert_eq!(driver.get_last_space(), b"row-data");
3965
3966 driver.reset(CompressionLevel::Fastest);
3967 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3968 assert!(driver.row_matcher().window.is_empty());
3969}
3970
3971#[test]
3973fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3974 let mut driver = MatchGeneratorDriver::new(8, 1);
3975 driver.reset(CompressionLevel::Level(4));
3976 assert_eq!(driver.active_backend, MatcherBackend::Row);
3977
3978 let _ = driver.row_matcher();
3981 let mut space = driver.get_next_space();
3982 space.extend_from_slice(b"row-data-to-recycle");
3983 driver.commit_space(space);
3984
3985 let before_pool = driver.vec_pool.len();
3986 driver.reset(CompressionLevel::Fastest);
3987
3988 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3989 let row = driver
3990 .row_match_generator
3991 .as_ref()
3992 .expect("row matcher should remain allocated after switch");
3993 assert!(row.row_heads.is_empty());
3994 assert!(row.row_positions.is_empty());
3995 assert!(row.row_tags.is_empty());
3996 assert!(
3997 driver.vec_pool.len() >= before_pool,
3998 "row reset should recycle row history buffers"
3999 );
4000}
4001
4002#[test]
4004fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
4005 let mut driver = MatchGeneratorDriver::new(8, 1);
4006 driver.active_backend = MatcherBackend::Row;
4007 driver.row_match_generator = None;
4008
4009 driver.reset(CompressionLevel::Fastest);
4010
4011 assert_eq!(driver.active_backend, MatcherBackend::Simple);
4012}
4013
4014#[test]
4015fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
4016 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
4017 params.window_log = 22;
4018 let adjusted = adjust_params_for_source_size(params, 0);
4019 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
4020}
4021
4022#[test]
4023fn common_prefix_len_matches_scalar_reference_across_offsets() {
4024 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
4025 a.iter()
4026 .zip(b.iter())
4027 .take_while(|(lhs, rhs)| lhs == rhs)
4028 .count()
4029 }
4030
4031 for total_len in [
4032 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
4033 ] {
4034 let base: Vec<u8> = (0..total_len)
4035 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
4036 .collect();
4037
4038 for start in [0usize, 1, 3] {
4039 if start > total_len {
4040 continue;
4041 }
4042 let a = &base[start..];
4043 let b = a.to_vec();
4044 assert_eq!(
4045 MatchGenerator::common_prefix_len(a, &b),
4046 scalar_reference(a, &b),
4047 "equal slices total_len={total_len} start={start}"
4048 );
4049
4050 let len = a.len();
4051 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
4052 if mismatch >= len {
4053 continue;
4054 }
4055 let mut altered = b.clone();
4056 altered[mismatch] ^= 0x5A;
4057 assert_eq!(
4058 MatchGenerator::common_prefix_len(a, &altered),
4059 scalar_reference(a, &altered),
4060 "total_len={total_len} start={start} mismatch={mismatch}"
4061 );
4062 }
4063
4064 if len > 0 {
4065 let mismatch = len - 1;
4066 let mut altered = b.clone();
4067 altered[mismatch] ^= 0xA5;
4068 assert_eq!(
4069 MatchGenerator::common_prefix_len(a, &altered),
4070 scalar_reference(a, &altered),
4071 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
4072 );
4073 }
4074 }
4075 }
4076
4077 let long = alloc::vec![0xAB; 320];
4078 let shorter = alloc::vec![0xAB; 137];
4079 assert_eq!(
4080 MatchGenerator::common_prefix_len(&long, &shorter),
4081 scalar_reference(&long, &shorter)
4082 );
4083}
4084
4085#[test]
4086fn row_pick_lazy_returns_none_when_next_is_better() {
4087 let mut matcher = RowMatchGenerator::new(1 << 22);
4088 matcher.configure(ROW_CONFIG);
4089 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
4090 matcher.ensure_tables();
4091
4092 let abs_pos = matcher.history_abs_start + 16;
4093 let best = MatchCandidate {
4094 start: abs_pos,
4095 offset: 8,
4096 match_len: ROW_MIN_MATCH_LEN,
4097 };
4098 assert!(
4099 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
4100 "lazy picker should defer when next position is clearly better"
4101 );
4102}
4103
4104#[test]
4105fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
4106 let mut matcher = RowMatchGenerator::new(1 << 22);
4107 matcher.configure(ROW_CONFIG);
4108 matcher.lazy_depth = 2;
4109 matcher.search_depth = 0;
4110 matcher.offset_hist = [6, 9, 1];
4111
4112 let mut data = alloc::vec![b'x'; 40];
4113 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
4114 matcher.add_data(data, |_| {});
4115 matcher.ensure_tables();
4116
4117 let abs_pos = matcher.history_abs_start + 20;
4118 let best = matcher
4119 .best_match(abs_pos, 0)
4120 .expect("expected baseline repcode match");
4121 assert_eq!(best.offset, 9);
4122 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
4123
4124 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
4125 assert!(next.match_len <= best.match_len);
4126 }
4127
4128 let next2 = matcher
4129 .best_match(abs_pos + 2, 2)
4130 .expect("expected +2 candidate");
4131 assert!(
4132 next2.match_len > best.match_len + 1,
4133 "+2 candidate must be significantly better for depth-2 lazy skip"
4134 );
4135 assert!(
4136 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
4137 "lazy picker should defer when +2 candidate is significantly better"
4138 );
4139}
4140
4141#[test]
4142fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
4143 let mut matcher = RowMatchGenerator::new(1 << 22);
4144 matcher.configure(ROW_CONFIG);
4145 matcher.lazy_depth = 2;
4146 matcher.search_depth = 0;
4147 matcher.offset_hist = [6, 9, 1];
4148
4149 let mut data = alloc::vec![b'x'; 40];
4150 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
4151 matcher.add_data(data, |_| {});
4152 matcher.ensure_tables();
4153
4154 let abs_pos = matcher.history_abs_start + 20;
4155 let best = matcher
4156 .best_match(abs_pos, 0)
4157 .expect("expected baseline repcode match");
4158 assert_eq!(best.offset, 9);
4159 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
4160
4161 let next2 = matcher
4162 .best_match(abs_pos + 2, 2)
4163 .expect("expected +2 candidate");
4164 assert_eq!(next2.match_len, best.match_len + 1);
4165 let chosen = matcher
4166 .pick_lazy_match(abs_pos, 0, Some(best))
4167 .expect("lazy picker should keep current best");
4168 assert_eq!(chosen.start, best.start);
4169 assert_eq!(chosen.offset, best.offset);
4170 assert_eq!(chosen.match_len, best.match_len);
4171}
4172
4173#[test]
4175fn row_hash_and_row_extracts_high_bits() {
4176 let mut matcher = RowMatchGenerator::new(1 << 22);
4177 matcher.configure(ROW_CONFIG);
4178 matcher.add_data(
4179 alloc::vec![
4180 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
4181 0x70, 0x80,
4182 ],
4183 |_| {},
4184 );
4185 matcher.ensure_tables();
4186
4187 let pos = matcher.history_abs_start + 8;
4188 let (row, tag) = matcher
4189 .hash_and_row(pos)
4190 .expect("row hash should be available");
4191
4192 let idx = pos - matcher.history_abs_start;
4193 let concat = matcher.live_history();
4194 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
4195 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
4196 let hash = value.wrapping_mul(PRIME);
4197 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
4198 let combined = hash >> (u64::BITS as usize - total_bits);
4199 let expected_row =
4200 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
4201 let expected_tag = combined as u8;
4202
4203 assert_eq!(row, expected_row);
4204 assert_eq!(tag, expected_tag);
4205}
4206
4207#[test]
4208fn row_repcode_skips_candidate_before_history_start() {
4209 let mut matcher = RowMatchGenerator::new(1 << 22);
4210 matcher.configure(ROW_CONFIG);
4211 matcher.history = alloc::vec![b'a'; 20];
4212 matcher.history_start = 0;
4213 matcher.history_abs_start = 10;
4214 matcher.offset_hist = [3, 0, 0];
4215
4216 assert!(matcher.repcode_candidate(12, 1).is_none());
4217}
4218
4219#[test]
4220fn row_repcode_returns_none_when_position_too_close_to_history_end() {
4221 let mut matcher = RowMatchGenerator::new(1 << 22);
4222 matcher.configure(ROW_CONFIG);
4223 matcher.history = b"abcde".to_vec();
4224 matcher.history_start = 0;
4225 matcher.history_abs_start = 0;
4226 matcher.offset_hist = [1, 0, 0];
4227
4228 assert!(matcher.repcode_candidate(4, 1).is_none());
4229}
4230
4231#[test]
4232fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
4233 let mut matcher = RowMatchGenerator::new(1 << 22);
4234 matcher.configure(ROW_CONFIG);
4235 matcher.history = b"abcde".to_vec();
4236 matcher.history_start = 0;
4237 matcher.history_abs_start = 0;
4238
4239 assert!(matcher.row_candidate(0, 0).is_none());
4240}
4241
4242#[test]
4243fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
4244 let mut hc = HcMatchGenerator::new(32);
4245 hc.history = b"abc".to_vec();
4246 hc.history_start = 0;
4247 hc.history_abs_start = 0;
4248 hc.ensure_tables();
4249
4250 let candidates = hc.chain_candidates(0);
4251 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
4252}
4253
4254#[test]
4255fn hc_reset_refills_existing_tables_with_empty_sentinel() {
4256 let mut hc = HcMatchGenerator::new(32);
4257 hc.add_data(b"abcdeabcde".to_vec(), |_| {});
4258 hc.ensure_tables();
4259 assert!(!hc.hash_table.is_empty());
4260 assert!(!hc.chain_table.is_empty());
4261 hc.hash_table.fill(123);
4262 hc.chain_table.fill(456);
4263
4264 hc.reset(|_| {});
4265
4266 assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
4267 assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
4268}
4269
4270#[test]
4271fn hc_start_matching_returns_early_for_empty_current_block() {
4272 let mut hc = HcMatchGenerator::new(32);
4273 hc.add_data(Vec::new(), |_| {});
4274 let mut called = false;
4275 hc.start_matching(|_| called = true);
4276 assert!(!called, "empty current block should not emit sequences");
4277}
4278
4279#[cfg(test)]
4280fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
4281 let mut out = Vec::with_capacity(len);
4282 let mut state = seed;
4283 for _ in 0..len {
4284 state ^= state << 13;
4285 state ^= state >> 7;
4286 state ^= state << 17;
4287 out.push((state >> 40) as u8);
4288 }
4289 out
4290}
4291
4292#[test]
4293fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
4294 let mut matcher = HcMatchGenerator::new(1 << 22);
4295 let tail = b"Qz9kLm2Rp";
4296 let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
4297 let tail_start = first.len() - tail.len();
4298 first[tail_start..].copy_from_slice(tail);
4299 matcher.add_data(first.clone(), |_| {});
4300 matcher.skip_matching(Some(true));
4301
4302 let mut second = tail.to_vec();
4303 second.extend_from_slice(b"after-tail-literals");
4304 matcher.add_data(second, |_| {});
4305
4306 let mut first_sequence = None;
4307 matcher.start_matching(|seq| {
4308 if first_sequence.is_some() {
4309 return;
4310 }
4311 first_sequence = Some(match seq {
4312 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
4313 Sequence::Triple {
4314 literals,
4315 offset,
4316 match_len,
4317 } => (literals.len(), offset, match_len),
4318 });
4319 });
4320
4321 let (literals_len, offset, match_len) =
4322 first_sequence.expect("expected at least one sequence after sparse skip");
4323 assert_eq!(
4324 literals_len, 0,
4325 "first sequence should start at block boundary"
4326 );
4327 assert_eq!(
4328 offset,
4329 tail.len(),
4330 "first match should reference previous tail"
4331 );
4332 assert!(
4333 match_len >= tail.len(),
4334 "tail-aligned cross-block match must be preserved"
4335 );
4336}
4337
4338#[test]
4339fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
4340 let mut matcher = HcMatchGenerator::new(1 << 22);
4341 let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
4342 matcher.add_data(first.clone(), |_| {});
4343 matcher.skip_matching(Some(true));
4344
4345 let current_len = first.len();
4346 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
4347 let current_abs_end = current_abs_start + current_len;
4348 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
4349 let tail_start = current_abs_end
4350 .saturating_sub(dense_tail)
4351 .max(matcher.history_abs_start)
4352 .max(current_abs_start);
4353
4354 let overlap_pos = (tail_start..current_abs_end)
4355 .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
4356 .expect("fixture should contain at least one sparse-grid overlap in dense tail");
4357
4358 let rel = matcher
4359 .relative_position(overlap_pos)
4360 .expect("overlap position should be representable as relative position");
4361 let chain_idx = rel as usize & ((1 << matcher.chain_log) - 1);
4362 assert_ne!(
4363 matcher.chain_table[chain_idx],
4364 rel + 1,
4365 "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
4366 );
4367}
4368
4369#[test]
4370fn hc_compact_history_drains_when_threshold_crossed() {
4371 let mut hc = HcMatchGenerator::new(8);
4372 hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
4373 hc.history_start = 16;
4374 hc.compact_history();
4375 assert_eq!(hc.history_start, 0);
4376 assert_eq!(hc.history, b"qrstuvwxyz");
4377}
4378
4379#[test]
4380fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
4381 let mut hc = HcMatchGenerator::new(32);
4382 hc.history = b"abcdefghijklmnop".to_vec();
4383 hc.history_abs_start = 0;
4384 hc.position_base = 1;
4385 hc.ensure_tables();
4386 let before_hash = hc.hash_table.clone();
4387 let before_chain = hc.chain_table.clone();
4388
4389 hc.insert_position_no_rebase(0);
4390
4391 assert_eq!(hc.hash_table, before_hash);
4392 assert_eq!(hc.chain_table, before_chain);
4393}
4394
4395#[test]
4396fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
4397 let mut driver = MatchGeneratorDriver::new(8, 1);
4398 driver.reset(CompressionLevel::Fastest);
4399 driver.match_generator.max_window_size = 8;
4402 driver.reported_window_size = 8;
4403
4404 let base_window = driver.match_generator.max_window_size;
4405 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4406 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
4407
4408 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4409 let mut space = driver.get_next_space();
4410 space.clear();
4411 space.extend_from_slice(block);
4412 driver.commit_space(space);
4413 driver.skip_matching_with_hint(None);
4414 }
4415
4416 assert_eq!(
4417 driver.dictionary_retained_budget, 0,
4418 "dictionary budget should be fully retired once primed dict slices are evicted"
4419 );
4420 assert_eq!(
4421 driver.match_generator.max_window_size, base_window,
4422 "retired dictionary budget must not remain reusable for live history"
4423 );
4424}
4425
4426#[test]
4427fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
4428 let mut driver = MatchGeneratorDriver::new(8, 1);
4429 driver.reset(CompressionLevel::Level(2));
4430 driver.dfast_matcher_mut().max_window_size = 8;
4433 driver.reported_window_size = 8;
4434
4435 let base_window = driver.dfast_matcher().max_window_size;
4436 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4437 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
4438
4439 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4440 let mut space = driver.get_next_space();
4441 space.clear();
4442 space.extend_from_slice(block);
4443 driver.commit_space(space);
4444 driver.skip_matching_with_hint(None);
4445 }
4446
4447 assert_eq!(
4448 driver.dictionary_retained_budget, 0,
4449 "dictionary budget should be fully retired once primed dict slices are evicted"
4450 );
4451 assert_eq!(
4452 driver.dfast_matcher().max_window_size,
4453 base_window,
4454 "retired dictionary budget must not remain reusable for live history"
4455 );
4456}
4457
4458#[test]
4459fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
4460 let mut driver = MatchGeneratorDriver::new(8, 1);
4461 driver.reset(CompressionLevel::Better);
4462
4463 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
4464
4465 let mut space = driver.get_next_space();
4466 space.clear();
4467 space.extend_from_slice(b"abcdefgh");
4470 driver.commit_space(space);
4471
4472 let mut saw_match = false;
4473 driver.start_matching(|seq| {
4474 if let Sequence::Triple {
4475 literals,
4476 offset,
4477 match_len,
4478 } = seq
4479 && literals.is_empty()
4480 && offset == 8
4481 && match_len >= HC_MIN_MATCH_LEN
4482 {
4483 saw_match = true;
4484 }
4485 });
4486
4487 assert!(
4488 saw_match,
4489 "hash-chain backend should match dictionary-primed history in first full block"
4490 );
4491}
4492
4493#[test]
4494fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
4495 let mut driver = MatchGeneratorDriver::new(8, 1);
4496 driver.reset(CompressionLevel::Better);
4497 driver.hc_matcher_mut().max_window_size = 8;
4499 driver.reported_window_size = 8;
4500
4501 let base_window = driver.hc_matcher().max_window_size;
4502 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4503 assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
4504
4505 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4506 let mut space = driver.get_next_space();
4507 space.clear();
4508 space.extend_from_slice(block);
4509 driver.commit_space(space);
4510 driver.skip_matching_with_hint(None);
4511 }
4512
4513 assert_eq!(
4514 driver.dictionary_retained_budget, 0,
4515 "dictionary budget should be fully retired once primed dict slices are evicted"
4516 );
4517 assert_eq!(
4518 driver.hc_matcher().max_window_size,
4519 base_window,
4520 "retired dictionary budget must not remain reusable for live history"
4521 );
4522}
4523
4524#[test]
4525fn hc_rebases_positions_after_u32_boundary() {
4526 let mut matcher = HcMatchGenerator::new(64);
4527 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4528 matcher.ensure_tables();
4529 matcher.position_base = 0;
4530 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4531 Ok(value) => value,
4532 Err(_) => return,
4533 };
4534 matcher.history_abs_start = history_abs_start;
4537 matcher.skip_matching(None);
4538 assert_eq!(
4539 matcher.position_base, matcher.history_abs_start,
4540 "rebase should anchor to the oldest live absolute position"
4541 );
4542
4543 assert!(
4544 matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
4545 "HC hash table should still be populated after crossing u32 boundary"
4546 );
4547
4548 let abs_pos = matcher.history_abs_start + 10;
4550 let candidates = matcher.chain_candidates(abs_pos);
4551 assert!(
4552 candidates.iter().any(|candidate| *candidate != usize::MAX),
4553 "chain_candidates should return valid matches after rebase"
4554 );
4555}
4556
4557#[test]
4558fn hc_rebase_rebuilds_only_inserted_prefix() {
4559 let mut matcher = HcMatchGenerator::new(64);
4560 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4561 matcher.ensure_tables();
4562 matcher.position_base = 0;
4563 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4564 Ok(value) => value,
4565 Err(_) => return,
4566 };
4567 matcher.history_abs_start = history_abs_start;
4568 let abs_pos = matcher.history_abs_start + 6;
4569
4570 let mut expected = HcMatchGenerator::new(64);
4571 expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4572 expected.ensure_tables();
4573 expected.history_abs_start = history_abs_start;
4574 expected.position_base = expected.history_abs_start;
4575 expected.hash_table.fill(HC_EMPTY);
4576 expected.chain_table.fill(HC_EMPTY);
4577 for pos in expected.history_abs_start..abs_pos {
4578 expected.insert_position_no_rebase(pos);
4579 }
4580
4581 matcher.maybe_rebase_positions(abs_pos);
4582
4583 assert_eq!(
4584 matcher.position_base, matcher.history_abs_start,
4585 "rebase should still anchor to the oldest live absolute position"
4586 );
4587 assert_eq!(
4588 matcher.hash_table, expected.hash_table,
4589 "rebase must rebuild only positions already inserted before abs_pos"
4590 );
4591 assert_eq!(
4592 matcher.chain_table, expected.chain_table,
4593 "future positions must not be pre-seeded into HC chains during rebase"
4594 );
4595}
4596
4597#[test]
4598fn suffix_store_with_single_slot_does_not_panic_on_keying() {
4599 let mut suffixes = SuffixStore::with_capacity(1);
4600 suffixes.insert(b"abcde", 0);
4601 assert!(suffixes.contains_key(b"abcde"));
4602 assert_eq!(suffixes.get(b"abcde"), Some(0));
4603}
4604
4605#[test]
4606fn fastest_reset_uses_interleaved_hash_fill_step() {
4607 let mut driver = MatchGeneratorDriver::new(32, 2);
4608
4609 driver.reset(CompressionLevel::Uncompressed);
4610 assert_eq!(driver.match_generator.hash_fill_step, 1);
4611
4612 driver.reset(CompressionLevel::Fastest);
4613 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
4614
4615 driver.reset(CompressionLevel::Better);
4618 assert_eq!(driver.active_backend, MatcherBackend::HashChain);
4619 assert_eq!(driver.window_size(), (1u64 << 23));
4620 assert_eq!(driver.hc_matcher().lazy_depth, 2);
4621}
4622
4623#[test]
4624fn simple_matcher_updates_offset_history_after_emitting_match() {
4625 let mut matcher = MatchGenerator::new(64);
4626 matcher.add_data(
4627 b"abcdeabcdeabcde".to_vec(),
4628 SuffixStore::with_capacity(64),
4629 |_, _| {},
4630 );
4631
4632 assert!(matcher.next_sequence(|seq| {
4633 assert_eq!(
4634 seq,
4635 Sequence::Triple {
4636 literals: b"abcde",
4637 offset: 5,
4638 match_len: 10,
4639 }
4640 );
4641 }));
4642 assert_eq!(matcher.offset_hist, [5, 1, 4]);
4643}
4644
4645#[test]
4646fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
4647 let mut matcher = MatchGenerator::new(64);
4648 matcher.add_data(
4649 b"abcdefghijabcdefghij".to_vec(),
4650 SuffixStore::with_capacity(64),
4651 |_, _| {},
4652 );
4653
4654 matcher.suffix_idx = 10;
4655 matcher.last_idx_in_sequence = 10;
4656 matcher.offset_hist = [99, 10, 4];
4657
4658 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4659 assert_eq!(candidate, Some((10, 10)));
4660}
4661
4662#[test]
4663fn simple_matcher_repcode_can_target_previous_window_entry() {
4664 let mut matcher = MatchGenerator::new(64);
4665 matcher.add_data(
4666 b"abcdefghij".to_vec(),
4667 SuffixStore::with_capacity(64),
4668 |_, _| {},
4669 );
4670 matcher.skip_matching();
4671 matcher.add_data(
4672 b"abcdefghij".to_vec(),
4673 SuffixStore::with_capacity(64),
4674 |_, _| {},
4675 );
4676
4677 matcher.offset_hist = [99, 10, 4];
4678
4679 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
4680 assert_eq!(candidate, Some((10, 10)));
4681}
4682
4683#[test]
4684fn simple_matcher_zero_literal_repcode_checks_rep2() {
4685 let mut matcher = MatchGenerator::new(64);
4686 matcher.add_data(
4687 b"abcdefghijabcdefghij".to_vec(),
4688 SuffixStore::with_capacity(64),
4689 |_, _| {},
4690 );
4691 matcher.suffix_idx = 10;
4692 matcher.last_idx_in_sequence = 10;
4693 matcher.offset_hist = [99, 4, 10];
4695
4696 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4697 assert_eq!(candidate, Some((10, 10)));
4698}
4699
4700#[test]
4701fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
4702 let mut matcher = MatchGenerator::new(64);
4703 matcher.add_data(
4704 b"abcdefghijabcdefghij".to_vec(),
4705 SuffixStore::with_capacity(64),
4706 |_, _| {},
4707 );
4708 matcher.suffix_idx = 10;
4709 matcher.last_idx_in_sequence = 10;
4710 matcher.offset_hist = [11, 4, 99];
4712
4713 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4714 assert_eq!(candidate, Some((10, 10)));
4715}
4716
4717#[test]
4718fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
4719 let mut matcher = MatchGenerator::new(64);
4720 matcher.add_data(
4721 b"abcdefghij".to_vec(),
4722 SuffixStore::with_capacity(64),
4723 |_, _| {},
4724 );
4725 matcher.skip_matching();
4726 matcher.add_data(
4727 b"klmnopqrst".to_vec(),
4728 SuffixStore::with_capacity(64),
4729 |_, _| {},
4730 );
4731 matcher.suffix_idx = 3;
4732
4733 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4734 assert_eq!(candidate, None);
4735}
4736
4737#[test]
4738fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4739 let mut matcher = MatchGenerator::new(64);
4740 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4741 matcher.add_data(
4742 b"abcdefghijklmnop".to_vec(),
4743 SuffixStore::with_capacity(64),
4744 |_, _| {},
4745 );
4746 matcher.skip_matching();
4747 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4748
4749 assert!(matcher.next_sequence(|seq| {
4750 assert_eq!(
4751 seq,
4752 Sequence::Triple {
4753 literals: b"",
4754 offset: 15,
4755 match_len: 5,
4756 }
4757 );
4758 }));
4759 assert!(!matcher.next_sequence(|_| {}));
4760}
4761
4762#[test]
4763fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
4764 let mut matcher = MatchGenerator::new(128);
4765 let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
4766 let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
4767 let tail_start = first.len() - MIN_MATCH_LEN;
4768 let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
4769 matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
4770
4771 matcher.skip_matching_with_hint(Some(true));
4772
4773 matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
4775 let mut sparse_first_is_literals = None;
4776 assert!(matcher.next_sequence(|seq| {
4777 if sparse_first_is_literals.is_none() {
4778 sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
4779 }
4780 }));
4781 assert!(
4782 sparse_first_is_literals.unwrap_or(false),
4783 "sparse-start probe should not produce an immediate match"
4784 );
4785
4786 let mut matcher = MatchGenerator::new(128);
4788 matcher.add_data(
4789 b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
4790 SuffixStore::with_capacity(256),
4791 |_, _| {},
4792 );
4793 matcher.skip_matching_with_hint(Some(true));
4794 matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
4795 let mut tail_first_is_immediate_match = None;
4796 assert!(matcher.next_sequence(|seq| {
4797 if tail_first_is_immediate_match.is_none() {
4798 tail_first_is_immediate_match =
4799 Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
4800 }
4801 }));
4802 assert!(
4803 tail_first_is_immediate_match.unwrap_or(false),
4804 "dense tail probe should match immediately at block start"
4805 );
4806}
4807
4808#[test]
4809fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4810 let mut matcher = MatchGenerator::new(64);
4811 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4812 matcher.add_data(
4813 b"01234abcde".to_vec(),
4814 SuffixStore::with_capacity(64),
4815 |_, _| {},
4816 );
4817 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4818
4819 let last = matcher.window.last().unwrap();
4820 let tail = &last.data[5..10];
4821 assert_eq!(last.suffixes.get(tail), Some(5));
4822}
4823
4824#[test]
4825fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4826 let mut matcher = MatchGenerator::new(128);
4827 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4828 matcher.add_data(
4829 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4830 SuffixStore::with_capacity(1 << 16),
4831 |_, _| {},
4832 );
4833
4834 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4835
4836 let last = matcher.window.last().unwrap();
4837 let first_key = &last.data[..MIN_MATCH_LEN];
4838 assert_eq!(last.suffixes.get(first_key), None);
4839}
4840
4841#[test]
4842fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4843 let mut matcher = MatchGenerator::new(128);
4844 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4845 matcher.add_data(
4846 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4847 SuffixStore::with_capacity(1 << 16),
4848 |_, _| {},
4849 );
4850
4851 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4852
4853 let last = matcher.window.last().unwrap();
4854 for pos in [0usize, 3, 6, 9, 12] {
4855 let key = &last.data[pos..pos + MIN_MATCH_LEN];
4856 assert_eq!(
4857 last.suffixes.get(key),
4858 Some(pos),
4859 "expected interleaved suffix registration at pos {pos}"
4860 );
4861 }
4862}
4863
4864#[test]
4865fn dfast_skip_matching_handles_window_eviction() {
4866 let mut matcher = DfastMatchGenerator::new(16);
4867
4868 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4869 matcher.skip_matching(None);
4870 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4871 matcher.skip_matching(None);
4872 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4873
4874 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4875 matcher.start_matching(|seq| match seq {
4876 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4877 Sequence::Triple {
4878 literals,
4879 offset,
4880 match_len,
4881 } => {
4882 reconstructed.extend_from_slice(literals);
4883 let start = reconstructed.len() - offset;
4884 for i in 0..match_len {
4885 let byte = reconstructed[start + i];
4886 reconstructed.push(byte);
4887 }
4888 }
4889 });
4890
4891 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4892}
4893
4894#[test]
4895fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4896 let mut matcher = DfastMatchGenerator::new(8);
4897
4898 let mut first = Vec::with_capacity(64);
4899 first.extend_from_slice(b"abcdefgh");
4900 matcher.add_data(first, |_| {});
4901
4902 let mut second = Vec::with_capacity(64);
4903 second.extend_from_slice(b"ijklmnop");
4904
4905 let mut observed_evicted_len = None;
4906 matcher.add_data(second, |data| {
4907 observed_evicted_len = Some(data.len());
4908 });
4909
4910 assert_eq!(
4911 observed_evicted_len,
4912 Some(8),
4913 "eviction callback must report evicted byte length, not backing capacity"
4914 );
4915}
4916
4917#[test]
4918fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4919 let mut matcher = DfastMatchGenerator::new(16);
4920
4921 let mut first = Vec::with_capacity(64);
4922 first.extend_from_slice(b"abcdefgh");
4923 matcher.add_data(first, |_| {});
4924
4925 let mut second = Vec::with_capacity(64);
4926 second.extend_from_slice(b"ijklmnop");
4927 matcher.add_data(second, |_| {});
4928
4929 matcher.max_window_size = 8;
4930
4931 let mut observed_evicted_len = None;
4932 matcher.trim_to_window(|data| {
4933 observed_evicted_len = Some(data.len());
4934 });
4935
4936 assert_eq!(
4937 observed_evicted_len,
4938 Some(8),
4939 "trim callback must report evicted byte length, not backing capacity"
4940 );
4941}
4942
4943#[test]
4944fn dfast_inserts_tail_positions_for_next_block_matching() {
4945 let mut matcher = DfastMatchGenerator::new(1 << 22);
4946
4947 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4948 let mut history = Vec::new();
4949 matcher.start_matching(|seq| match seq {
4950 Sequence::Literals { literals } => history.extend_from_slice(literals),
4951 Sequence::Triple { .. } => unreachable!("first block should not match history"),
4952 });
4953 assert_eq!(history, b"012345bcdea");
4954
4955 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4956 let mut saw_first_sequence = false;
4957 matcher.start_matching(|seq| {
4958 assert!(!saw_first_sequence, "expected a single cross-block match");
4959 saw_first_sequence = true;
4960 match seq {
4961 Sequence::Literals { .. } => {
4962 panic!("expected tail-anchored cross-block match before any literals")
4963 }
4964 Sequence::Triple {
4965 literals,
4966 offset,
4967 match_len,
4968 } => {
4969 assert_eq!(literals, b"");
4970 assert_eq!(offset, 5);
4971 assert_eq!(match_len, 11);
4972 let start = history.len() - offset;
4973 for i in 0..match_len {
4974 let byte = history[start + i];
4975 history.push(byte);
4976 }
4977 }
4978 }
4979 });
4980
4981 assert!(
4982 saw_first_sequence,
4983 "expected tail-anchored cross-block match"
4984 );
4985 assert_eq!(history, b"012345bcdeabcdeabcdeab");
4986}
4987
4988#[test]
4989fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
4990 let mut matcher = DfastMatchGenerator::new(1 << 22);
4991 let tail = b"Qz9kLm2Rp";
4992 let mut first = b"0123456789abcdef".to_vec();
4993 first.extend_from_slice(tail);
4994 matcher.add_data(first.clone(), |_| {});
4995 matcher.skip_matching(Some(false));
4996
4997 let mut second = tail.to_vec();
4998 second.extend_from_slice(b"after-tail-literals");
4999 matcher.add_data(second, |_| {});
5000
5001 let mut first_sequence = None;
5002 matcher.start_matching(|seq| {
5003 if first_sequence.is_some() {
5004 return;
5005 }
5006 first_sequence = Some(match seq {
5007 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5008 Sequence::Triple {
5009 literals,
5010 offset,
5011 match_len,
5012 } => (literals.len(), offset, match_len),
5013 });
5014 });
5015
5016 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5017 assert_eq!(
5018 lit_len, 0,
5019 "expected immediate cross-block match at block start"
5020 );
5021 assert_eq!(
5022 offset,
5023 tail.len(),
5024 "expected dense skip to preserve cross-boundary tail match"
5025 );
5026 assert!(
5027 match_len >= DFAST_MIN_MATCH_LEN,
5028 "match length should satisfy dfast minimum match length"
5029 );
5030}
5031
5032#[test]
5033fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
5034 let mut matcher = DfastMatchGenerator::new(1 << 22);
5035 let tail = b"Qz9kLm2Rp";
5036 let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
5037 let tail_start = first.len() - tail.len();
5038 first[tail_start..].copy_from_slice(tail);
5039 matcher.add_data(first.clone(), |_| {});
5040
5041 matcher.skip_matching(Some(true));
5042
5043 let mut second = tail.to_vec();
5044 second.extend_from_slice(b"after-tail-literals");
5045 matcher.add_data(second, |_| {});
5046
5047 let mut first_sequence = None;
5048 matcher.start_matching(|seq| {
5049 if first_sequence.is_some() {
5050 return;
5051 }
5052 first_sequence = Some(match seq {
5053 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5054 Sequence::Triple {
5055 literals,
5056 offset,
5057 match_len,
5058 } => (literals.len(), offset, match_len),
5059 });
5060 });
5061
5062 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5063 assert_eq!(
5064 lit_len, 0,
5065 "expected immediate cross-block match at block start"
5066 );
5067 assert_eq!(
5068 offset,
5069 tail.len(),
5070 "expected match against densely seeded tail"
5071 );
5072 assert!(
5073 match_len >= DFAST_MIN_MATCH_LEN,
5074 "match length should satisfy dfast minimum match length"
5075 );
5076}
5077
5078#[test]
5079fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
5080 let mut matcher = DfastMatchGenerator::new(1 << 22);
5081 let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
5082 let first_len = first.len();
5083 matcher.add_data(first, |_| {});
5084 matcher.skip_matching_dense();
5085
5086 matcher.add_data(alloc::vec![0xAB], |_| {});
5089 matcher.skip_matching_dense();
5090
5091 let target_abs_pos = first_len - 7;
5092 let target_rel = target_abs_pos - matcher.history_abs_start;
5093 let live = matcher.live_history();
5094 assert!(
5095 target_rel + 8 <= live.len(),
5096 "fixture must make the boundary start long-hashable"
5097 );
5098 let long_hash = matcher.hash8(&live[target_rel..]);
5099 assert!(
5100 matcher.long_hash[long_hash].contains(&target_abs_pos),
5101 "dense skip must seed long-hash entry for newly hashable boundary start"
5102 );
5103}
5104
5105#[test]
5106fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
5107 let mut matcher = DfastMatchGenerator::new(1 << 20);
5108 let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
5109 matcher.add_data(block, |_| {});
5110 matcher.ensure_hash_tables();
5111
5112 let current_len = matcher.window.back().unwrap().len();
5113 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
5114 let seed_start = current_len - DFAST_MIN_MATCH_LEN;
5115 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
5116
5117 let target_abs_pos = current_abs_start + current_len - 4;
5118 let target_rel = target_abs_pos - matcher.history_abs_start;
5119 let live = matcher.live_history();
5120 assert!(
5121 target_rel + 4 <= live.len(),
5122 "fixture must leave the last short-hash start valid"
5123 );
5124 let short_hash = matcher.hash4(&live[target_rel..]);
5125 assert!(
5126 matcher.short_hash[short_hash].contains(&target_abs_pos),
5127 "tail seeding must include the last 4-byte-hashable start"
5128 );
5129}
5130
5131#[test]
5132fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
5133 let mut matcher = DfastMatchGenerator::new(1 << 20);
5134 let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
5135 matcher.add_data(block, |_| {});
5136 matcher.ensure_hash_tables();
5137
5138 let current_len = matcher.window.back().unwrap().len();
5139 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
5140 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
5141
5142 let target_abs_pos = current_abs_start + current_len - 4;
5143 let target_rel = target_abs_pos - matcher.history_abs_start;
5144 let live = matcher.live_history();
5145 assert!(
5146 target_rel + 4 <= live.len(),
5147 "fixture must leave the last short-hash start valid"
5148 );
5149 let short_hash = matcher.hash4(&live[target_rel..]);
5150 assert!(
5151 matcher.short_hash[short_hash].contains(&target_abs_pos),
5152 "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
5153 );
5154}
5155
5156#[test]
5157fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
5158 let mut matcher = DfastMatchGenerator::new(1 << 22);
5159 let boundary_prefix = [0xFA, 0xFB, 0xFC];
5160 let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
5161
5162 let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
5163 let first_tail_start = first.len() - boundary_prefix.len();
5164 first[first_tail_start..].copy_from_slice(&boundary_prefix);
5165 matcher.add_data(first, |_| {});
5166 matcher.skip_matching(Some(true));
5167
5168 let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
5169 second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
5170 matcher.add_data(second.clone(), |_| {});
5171 matcher.skip_matching(Some(true));
5172
5173 let mut third = boundary_prefix.to_vec();
5174 third.extend_from_slice(&boundary_suffix);
5175 third.extend_from_slice(b"-trailing-literals");
5176 matcher.add_data(third, |_| {});
5177
5178 let mut first_sequence = None;
5179 matcher.start_matching(|seq| {
5180 if first_sequence.is_some() {
5181 return;
5182 }
5183 first_sequence = Some(match seq {
5184 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5185 Sequence::Triple {
5186 literals,
5187 offset,
5188 match_len,
5189 } => (literals.len(), offset, match_len),
5190 });
5191 });
5192
5193 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5194 assert_eq!(
5195 lit_len, 0,
5196 "expected immediate match from the prior sparse-skip boundary"
5197 );
5198 assert_eq!(
5199 offset,
5200 second.len() + boundary_prefix.len(),
5201 "expected match against backfilled first→second boundary start"
5202 );
5203 assert!(
5204 match_len >= DFAST_MIN_MATCH_LEN,
5205 "match length should satisfy dfast minimum match length"
5206 );
5207}
5208
5209#[test]
5210fn fastest_hint_iteration_23_sequences_reconstruct_source() {
5211 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
5212 let mut state = seed;
5213 let mut data = Vec::with_capacity(len);
5214 for _ in 0..len {
5215 state = state
5216 .wrapping_mul(6364136223846793005)
5217 .wrapping_add(1442695040888963407);
5218 data.push((state >> 33) as u8);
5219 }
5220 data
5221 }
5222
5223 let i = 23u64;
5224 let len = (i * 89 % 16384) as usize;
5225 let mut data = generate_data(i, len);
5226 let repeat = data[128..256].to_vec();
5229 data.extend_from_slice(&repeat);
5230 data.extend_from_slice(&repeat);
5231
5232 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
5233 driver.set_source_size_hint(data.len() as u64);
5234 driver.reset(CompressionLevel::Fastest);
5235 let mut space = driver.get_next_space();
5236 space[..data.len()].copy_from_slice(&data);
5237 space.truncate(data.len());
5238 driver.commit_space(space);
5239
5240 let mut rebuilt = Vec::with_capacity(data.len());
5241 let mut saw_triple = false;
5242 driver.start_matching(|seq| match seq {
5243 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
5244 Sequence::Triple {
5245 literals,
5246 offset,
5247 match_len,
5248 } => {
5249 saw_triple = true;
5250 rebuilt.extend_from_slice(literals);
5251 assert!(offset > 0, "offset must be non-zero");
5252 assert!(
5253 offset <= rebuilt.len(),
5254 "offset must reference already-produced bytes: offset={} produced={}",
5255 offset,
5256 rebuilt.len()
5257 );
5258 let start = rebuilt.len() - offset;
5259 for idx in 0..match_len {
5260 let b = rebuilt[start + idx];
5261 rebuilt.push(b);
5262 }
5263 }
5264 });
5265
5266 assert!(saw_triple, "fixture must emit at least one match");
5267 assert_eq!(rebuilt, data);
5268}