1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const DFAST_TARGET_LEN: usize = 48;
22const DFAST_HASH_BITS: usize = 20;
25const DFAST_SEARCH_DEPTH: usize = 4;
26const DFAST_EMPTY_SLOT: usize = usize::MAX;
27
28const HC_HASH_LOG: usize = 20;
29const HC_CHAIN_LOG: usize = 19;
30const HC_SEARCH_DEPTH: usize = 16;
31const HC_MIN_MATCH_LEN: usize = 5;
32const HC_TARGET_LEN: usize = 48;
33const HC_EMPTY: u32 = 0;
36
37const MAX_HC_SEARCH_DEPTH: usize = 32;
40
41#[derive(Copy, Clone)]
44struct HcConfig {
45 hash_log: usize,
46 chain_log: usize,
47 search_depth: usize,
48 target_len: usize,
49}
50
51const HC_CONFIG: HcConfig = HcConfig {
52 hash_log: HC_HASH_LOG,
53 chain_log: HC_CHAIN_LOG,
54 search_depth: HC_SEARCH_DEPTH,
55 target_len: HC_TARGET_LEN,
56};
57
58const BEST_HC_CONFIG: HcConfig = HcConfig {
60 hash_log: 21,
61 chain_log: 20,
62 search_depth: 32,
63 target_len: 128,
64};
65
66#[derive(Copy, Clone)]
68struct LevelParams {
69 backend: MatcherBackend,
70 window_log: u8,
71 hash_fill_step: usize,
72 lazy_depth: u8,
73 hc: HcConfig,
74}
75
76fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
77 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
78 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
79}
80
81#[rustfmt::skip]
90const LEVEL_TABLE: [LevelParams; 22] = [
91 LevelParams { backend: MatcherBackend::Simple, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG },
94 LevelParams { backend: MatcherBackend::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
95 LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
96 LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
97 LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 } },
98 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 } },
99 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 } },
100 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 } },
101 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 } },
102 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 } },
103 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG },
104 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 } },
105 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 } },
106 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 } },
107 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 } },
108 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 } },
109 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
110 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
111 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
112 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
113 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
114 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
115];
116
117const MIN_WINDOW_LOG: u8 = 10;
119
120fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
129 let src_log = if src_size == 0 {
133 MIN_WINDOW_LOG
134 } else {
135 (64 - (src_size - 1).leading_zeros()) as u8 };
137 let src_log = src_log.max(MIN_WINDOW_LOG);
138 if src_log < params.window_log {
139 params.window_log = src_log;
140 }
141 if params.backend == MatcherBackend::HashChain {
145 if (src_log + 2) < params.hc.hash_log as u8 {
146 params.hc.hash_log = (src_log + 2) as usize;
147 }
148 if (src_log + 1) < params.hc.chain_log as u8 {
149 params.hc.chain_log = (src_log + 1) as usize;
150 }
151 }
152 params
153}
154
155fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
158 let params = match level {
159 CompressionLevel::Uncompressed => LevelParams {
160 backend: MatcherBackend::Simple,
161 window_log: 17,
162 hash_fill_step: 1,
163 lazy_depth: 0,
164 hc: HC_CONFIG,
165 },
166 CompressionLevel::Fastest => LEVEL_TABLE[0],
167 CompressionLevel::Default => LEVEL_TABLE[2],
168 CompressionLevel::Better => LEVEL_TABLE[6],
169 CompressionLevel::Best => LEVEL_TABLE[10],
170 CompressionLevel::Level(n) => {
171 if n > 0 {
172 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
173 LEVEL_TABLE[idx]
174 } else if n == 0 {
175 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
177 } else {
178 let acceleration =
182 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
183 let step = (acceleration + 3).min(128);
184 LevelParams {
185 backend: MatcherBackend::Simple,
186 window_log: 17,
187 hash_fill_step: step,
188 lazy_depth: 0,
189 hc: HC_CONFIG,
190 }
191 }
192 }
193 };
194 if let Some(size) = source_size {
195 adjust_params_for_source_size(params, size)
196 } else {
197 params
198 }
199}
200
201#[derive(Copy, Clone, Debug, PartialEq, Eq)]
202enum MatcherBackend {
203 Simple,
204 Dfast,
205 HashChain,
206}
207
208pub struct MatchGeneratorDriver {
210 vec_pool: Vec<Vec<u8>>,
211 suffix_pool: Vec<SuffixStore>,
212 match_generator: MatchGenerator,
213 dfast_match_generator: Option<DfastMatchGenerator>,
214 hc_match_generator: Option<HcMatchGenerator>,
215 active_backend: MatcherBackend,
216 slice_size: usize,
217 base_slice_size: usize,
218 reported_window_size: usize,
221 dictionary_retained_budget: usize,
224 source_size_hint: Option<u64>,
226}
227
228impl MatchGeneratorDriver {
229 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
234 let max_window_size = max_slices_in_window * slice_size;
235 Self {
236 vec_pool: Vec::new(),
237 suffix_pool: Vec::new(),
238 match_generator: MatchGenerator::new(max_window_size),
239 dfast_match_generator: None,
240 hc_match_generator: None,
241 active_backend: MatcherBackend::Simple,
242 slice_size,
243 base_slice_size: slice_size,
244 reported_window_size: max_window_size,
245 dictionary_retained_budget: 0,
246 source_size_hint: None,
247 }
248 }
249
250 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
251 resolve_level_params(level, source_size)
252 }
253
254 fn dfast_matcher(&self) -> &DfastMatchGenerator {
255 self.dfast_match_generator
256 .as_ref()
257 .expect("dfast backend must be initialized by reset() before use")
258 }
259
260 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
261 self.dfast_match_generator
262 .as_mut()
263 .expect("dfast backend must be initialized by reset() before use")
264 }
265
266 fn hc_matcher(&self) -> &HcMatchGenerator {
267 self.hc_match_generator
268 .as_ref()
269 .expect("hash chain backend must be initialized by reset() before use")
270 }
271
272 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
273 self.hc_match_generator
274 .as_mut()
275 .expect("hash chain backend must be initialized by reset() before use")
276 }
277
278 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
279 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
280 if reclaimed == 0 {
281 return;
282 }
283 self.dictionary_retained_budget -= reclaimed;
284 match self.active_backend {
285 MatcherBackend::Simple => {
286 self.match_generator.max_window_size = self
287 .match_generator
288 .max_window_size
289 .saturating_sub(reclaimed);
290 }
291 MatcherBackend::Dfast => {
292 let matcher = self.dfast_matcher_mut();
293 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
294 }
295 MatcherBackend::HashChain => {
296 let matcher = self.hc_matcher_mut();
297 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
298 }
299 }
300 }
301
302 fn trim_after_budget_retire(&mut self) {
303 loop {
304 let mut evicted_bytes = 0usize;
305 match self.active_backend {
306 MatcherBackend::Simple => {
307 let vec_pool = &mut self.vec_pool;
308 let suffix_pool = &mut self.suffix_pool;
309 self.match_generator.reserve(0, |mut data, mut suffixes| {
310 evicted_bytes += data.len();
311 data.resize(data.capacity(), 0);
312 vec_pool.push(data);
313 suffixes.slots.clear();
314 suffixes.slots.resize(suffixes.slots.capacity(), None);
315 suffix_pool.push(suffixes);
316 });
317 }
318 MatcherBackend::Dfast => {
319 let mut retired = Vec::new();
320 self.dfast_matcher_mut().trim_to_window(|data| {
321 evicted_bytes += data.len();
322 retired.push(data);
323 });
324 for mut data in retired {
325 data.resize(data.capacity(), 0);
326 self.vec_pool.push(data);
327 }
328 }
329 MatcherBackend::HashChain => {
330 let mut retired = Vec::new();
331 self.hc_matcher_mut().trim_to_window(|data| {
332 evicted_bytes += data.len();
333 retired.push(data);
334 });
335 for mut data in retired {
336 data.resize(data.capacity(), 0);
337 self.vec_pool.push(data);
338 }
339 }
340 }
341 if evicted_bytes == 0 {
342 break;
343 }
344 self.retire_dictionary_budget(evicted_bytes);
345 }
346 }
347}
348
349impl Matcher for MatchGeneratorDriver {
350 fn supports_dictionary_priming(&self) -> bool {
351 true
352 }
353
354 fn set_source_size_hint(&mut self, size: u64) {
355 self.source_size_hint = Some(size);
356 }
357
358 fn reset(&mut self, level: CompressionLevel) {
359 let hint = self.source_size_hint.take();
360 let hinted = hint.is_some();
361 let params = Self::level_params(level, hint);
362 let max_window_size = 1usize << params.window_log;
363 self.dictionary_retained_budget = 0;
364 if self.active_backend != params.backend {
365 match self.active_backend {
366 MatcherBackend::Simple => {
367 let vec_pool = &mut self.vec_pool;
368 let suffix_pool = &mut self.suffix_pool;
369 self.match_generator.reset(|mut data, mut suffixes| {
370 data.resize(data.capacity(), 0);
371 vec_pool.push(data);
372 suffixes.slots.clear();
373 suffixes.slots.resize(suffixes.slots.capacity(), None);
374 suffix_pool.push(suffixes);
375 });
376 }
377 MatcherBackend::Dfast => {
378 if let Some(dfast) = self.dfast_match_generator.as_mut() {
379 let vec_pool = &mut self.vec_pool;
380 dfast.reset(|mut data| {
381 data.resize(data.capacity(), 0);
382 vec_pool.push(data);
383 });
384 }
385 }
386 MatcherBackend::HashChain => {
387 if let Some(hc) = self.hc_match_generator.as_mut() {
388 hc.hash_table = Vec::new();
391 hc.chain_table = Vec::new();
392 let vec_pool = &mut self.vec_pool;
393 hc.reset(|mut data| {
394 data.resize(data.capacity(), 0);
395 vec_pool.push(data);
396 });
397 }
398 }
399 }
400 }
401
402 self.active_backend = params.backend;
403 self.slice_size = self.base_slice_size.min(max_window_size);
404 self.reported_window_size = max_window_size;
405 match self.active_backend {
406 MatcherBackend::Simple => {
407 let vec_pool = &mut self.vec_pool;
408 let suffix_pool = &mut self.suffix_pool;
409 self.match_generator.max_window_size = max_window_size;
410 self.match_generator.hash_fill_step = params.hash_fill_step;
411 self.match_generator.reset(|mut data, mut suffixes| {
412 data.resize(data.capacity(), 0);
413 vec_pool.push(data);
414 suffixes.slots.clear();
415 suffixes.slots.resize(suffixes.slots.capacity(), None);
416 suffix_pool.push(suffixes);
417 });
418 }
419 MatcherBackend::Dfast => {
420 let dfast = self
421 .dfast_match_generator
422 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
423 dfast.max_window_size = max_window_size;
424 dfast.lazy_depth = params.lazy_depth;
425 dfast.set_hash_bits(if hinted {
426 dfast_hash_bits_for_window(max_window_size)
427 } else {
428 DFAST_HASH_BITS
429 });
430 let vec_pool = &mut self.vec_pool;
431 dfast.reset(|mut data| {
432 data.resize(data.capacity(), 0);
433 vec_pool.push(data);
434 });
435 }
436 MatcherBackend::HashChain => {
437 let hc = self
438 .hc_match_generator
439 .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
440 hc.max_window_size = max_window_size;
441 hc.lazy_depth = params.lazy_depth;
442 hc.configure(params.hc);
443 let vec_pool = &mut self.vec_pool;
444 hc.reset(|mut data| {
445 data.resize(data.capacity(), 0);
446 vec_pool.push(data);
447 });
448 }
449 }
450 }
451
452 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
453 match self.active_backend {
454 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
455 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
456 MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
457 }
458
459 if dict_content.is_empty() {
460 return;
461 }
462
463 let retained_dict_budget = dict_content.len();
466 match self.active_backend {
467 MatcherBackend::Simple => {
468 self.match_generator.max_window_size = self
469 .match_generator
470 .max_window_size
471 .saturating_add(retained_dict_budget);
472 }
473 MatcherBackend::Dfast => {
474 let matcher = self.dfast_matcher_mut();
475 matcher.max_window_size =
476 matcher.max_window_size.saturating_add(retained_dict_budget);
477 }
478 MatcherBackend::HashChain => {
479 let matcher = self.hc_matcher_mut();
480 matcher.max_window_size =
481 matcher.max_window_size.saturating_add(retained_dict_budget);
482 }
483 }
484
485 let mut start = 0usize;
486 let mut committed_dict_budget = 0usize;
487 let min_primed_tail = match self.active_backend {
491 MatcherBackend::Simple => MIN_MATCH_LEN,
492 MatcherBackend::Dfast | MatcherBackend::HashChain => 4,
493 };
494 while start < dict_content.len() {
495 let end = (start + self.slice_size).min(dict_content.len());
496 if end - start < min_primed_tail {
497 break;
498 }
499 let mut space = self.get_next_space();
500 space.clear();
501 space.extend_from_slice(&dict_content[start..end]);
502 self.commit_space(space);
503 self.skip_matching();
504 committed_dict_budget += end - start;
505 start = end;
506 }
507
508 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
509 if uncommitted_tail_budget > 0 {
510 match self.active_backend {
511 MatcherBackend::Simple => {
512 self.match_generator.max_window_size = self
513 .match_generator
514 .max_window_size
515 .saturating_sub(uncommitted_tail_budget);
516 }
517 MatcherBackend::Dfast => {
518 let matcher = self.dfast_matcher_mut();
519 matcher.max_window_size = matcher
520 .max_window_size
521 .saturating_sub(uncommitted_tail_budget);
522 }
523 MatcherBackend::HashChain => {
524 let matcher = self.hc_matcher_mut();
525 matcher.max_window_size = matcher
526 .max_window_size
527 .saturating_sub(uncommitted_tail_budget);
528 }
529 }
530 }
531 if committed_dict_budget > 0 {
532 self.dictionary_retained_budget = self
533 .dictionary_retained_budget
534 .saturating_add(committed_dict_budget);
535 }
536 }
537
538 fn window_size(&self) -> u64 {
539 self.reported_window_size as u64
540 }
541
542 fn get_next_space(&mut self) -> Vec<u8> {
543 if let Some(mut space) = self.vec_pool.pop() {
544 if space.len() > self.slice_size {
545 space.truncate(self.slice_size);
546 }
547 if space.len() < self.slice_size {
548 space.resize(self.slice_size, 0);
549 }
550 return space;
551 }
552 alloc::vec![0; self.slice_size]
553 }
554
555 fn get_last_space(&mut self) -> &[u8] {
556 match self.active_backend {
557 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
558 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
559 MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
560 }
561 }
562
563 fn commit_space(&mut self, space: Vec<u8>) {
564 match self.active_backend {
565 MatcherBackend::Simple => {
566 let vec_pool = &mut self.vec_pool;
567 let mut evicted_bytes = 0usize;
568 let suffixes = match self.suffix_pool.pop() {
569 Some(store) if store.slots.len() >= space.len() => store,
570 _ => SuffixStore::with_capacity(space.len()),
571 };
572 let suffix_pool = &mut self.suffix_pool;
573 self.match_generator
574 .add_data(space, suffixes, |mut data, mut suffixes| {
575 evicted_bytes += data.len();
576 data.resize(data.capacity(), 0);
577 vec_pool.push(data);
578 suffixes.slots.clear();
579 suffixes.slots.resize(suffixes.slots.capacity(), None);
580 suffix_pool.push(suffixes);
581 });
582 self.retire_dictionary_budget(evicted_bytes);
583 self.trim_after_budget_retire();
584 }
585 MatcherBackend::Dfast => {
586 let vec_pool = &mut self.vec_pool;
587 let mut evicted_bytes = 0usize;
588 self.dfast_match_generator
589 .as_mut()
590 .expect("dfast backend must be initialized by reset() before use")
591 .add_data(space, |mut data| {
592 evicted_bytes += data.len();
593 data.resize(data.capacity(), 0);
594 vec_pool.push(data);
595 });
596 self.retire_dictionary_budget(evicted_bytes);
597 self.trim_after_budget_retire();
598 }
599 MatcherBackend::HashChain => {
600 let vec_pool = &mut self.vec_pool;
601 let mut evicted_bytes = 0usize;
602 self.hc_match_generator
603 .as_mut()
604 .expect("hash chain backend must be initialized by reset() before use")
605 .add_data(space, |mut data| {
606 evicted_bytes += data.len();
607 data.resize(data.capacity(), 0);
608 vec_pool.push(data);
609 });
610 self.retire_dictionary_budget(evicted_bytes);
611 self.trim_after_budget_retire();
612 }
613 }
614 }
615
616 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
617 match self.active_backend {
618 MatcherBackend::Simple => {
619 while self.match_generator.next_sequence(&mut handle_sequence) {}
620 }
621 MatcherBackend::Dfast => self
622 .dfast_matcher_mut()
623 .start_matching(&mut handle_sequence),
624 MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
625 }
626 }
627 fn skip_matching(&mut self) {
628 match self.active_backend {
629 MatcherBackend::Simple => self.match_generator.skip_matching(),
630 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
631 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
632 }
633 }
634}
635
636struct SuffixStore {
639 slots: Vec<Option<NonZeroUsize>>,
643 len_log: u32,
644}
645
646impl SuffixStore {
647 fn with_capacity(capacity: usize) -> Self {
648 Self {
649 slots: alloc::vec![None; capacity],
650 len_log: capacity.ilog2(),
651 }
652 }
653
654 #[inline(always)]
655 fn insert(&mut self, suffix: &[u8], idx: usize) {
656 let key = self.key(suffix);
657 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
658 }
659
660 #[inline(always)]
661 fn contains_key(&self, suffix: &[u8]) -> bool {
662 let key = self.key(suffix);
663 self.slots[key].is_some()
664 }
665
666 #[inline(always)]
667 fn get(&self, suffix: &[u8]) -> Option<usize> {
668 let key = self.key(suffix);
669 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
670 }
671
672 #[inline(always)]
673 fn key(&self, suffix: &[u8]) -> usize {
674 if self.len_log == 0 {
676 return 0;
677 }
678
679 let s0 = suffix[0] as u64;
680 let s1 = suffix[1] as u64;
681 let s2 = suffix[2] as u64;
682 let s3 = suffix[3] as u64;
683 let s4 = suffix[4] as u64;
684
685 const POLY: u64 = 0xCF3BCCDCABu64;
686
687 let s0 = (s0 << 24).wrapping_mul(POLY);
688 let s1 = (s1 << 32).wrapping_mul(POLY);
689 let s2 = (s2 << 40).wrapping_mul(POLY);
690 let s3 = (s3 << 48).wrapping_mul(POLY);
691 let s4 = (s4 << 56).wrapping_mul(POLY);
692
693 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
694 let index = index >> (64 - self.len_log);
695 index as usize % self.slots.len()
696 }
697}
698
699struct WindowEntry {
702 data: Vec<u8>,
703 suffixes: SuffixStore,
705 base_offset: usize,
707}
708
709pub(crate) struct MatchGenerator {
710 max_window_size: usize,
711 window: Vec<WindowEntry>,
714 window_size: usize,
715 #[cfg(debug_assertions)]
716 concat_window: Vec<u8>,
717 suffix_idx: usize,
719 last_idx_in_sequence: usize,
721 hash_fill_step: usize,
722 offset_hist: [u32; 3],
723}
724
725impl MatchGenerator {
726 #[inline(always)]
727 #[cfg(target_endian = "little")]
728 fn mismatch_byte_index(diff: usize) -> usize {
729 diff.trailing_zeros() as usize / 8
730 }
731
732 #[inline(always)]
733 #[cfg(target_endian = "big")]
734 fn mismatch_byte_index(diff: usize) -> usize {
735 diff.leading_zeros() as usize / 8
736 }
737
738 fn new(max_size: usize) -> Self {
740 Self {
741 max_window_size: max_size,
742 window: Vec::new(),
743 window_size: 0,
744 #[cfg(debug_assertions)]
745 concat_window: Vec::new(),
746 suffix_idx: 0,
747 last_idx_in_sequence: 0,
748 hash_fill_step: 1,
749 offset_hist: [1, 4, 8],
750 }
751 }
752
753 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
754 self.window_size = 0;
755 #[cfg(debug_assertions)]
756 self.concat_window.clear();
757 self.suffix_idx = 0;
758 self.last_idx_in_sequence = 0;
759 self.offset_hist = [1, 4, 8];
760 self.window.drain(..).for_each(|entry| {
761 reuse_space(entry.data, entry.suffixes);
762 });
763 }
764
765 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
770 loop {
771 let last_entry = self.window.last().unwrap();
772 let data_slice = &last_entry.data;
773
774 if self.suffix_idx >= data_slice.len() {
776 if self.last_idx_in_sequence != self.suffix_idx {
777 let literals = &data_slice[self.last_idx_in_sequence..];
778 self.last_idx_in_sequence = self.suffix_idx;
779 handle_sequence(Sequence::Literals { literals });
780 return true;
781 } else {
782 return false;
783 }
784 }
785
786 let data_slice = &data_slice[self.suffix_idx..];
788 if data_slice.len() < MIN_MATCH_LEN {
789 let last_idx_in_sequence = self.last_idx_in_sequence;
790 self.last_idx_in_sequence = last_entry.data.len();
791 self.suffix_idx = last_entry.data.len();
792 handle_sequence(Sequence::Literals {
793 literals: &last_entry.data[last_idx_in_sequence..],
794 });
795 return true;
796 }
797
798 let key = &data_slice[..MIN_MATCH_LEN];
800 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
801
802 let mut candidate = self.repcode_candidate(data_slice, literals_len);
804 for match_entry in self.window.iter() {
805 if let Some(match_index) = match_entry.suffixes.get(key) {
806 let match_slice = &match_entry.data[match_index..];
807
808 let match_len = Self::common_prefix_len(match_slice, data_slice);
810
811 if match_len >= MIN_MATCH_LEN {
813 let offset = match_entry.base_offset + self.suffix_idx - match_index;
814
815 #[cfg(debug_assertions)]
817 {
818 let unprocessed = last_entry.data.len() - self.suffix_idx;
819 let start = self.concat_window.len() - unprocessed - offset;
820 let end = start + match_len;
821 let check_slice = &self.concat_window[start..end];
822 debug_assert_eq!(check_slice, &match_slice[..match_len]);
823 }
824
825 if let Some((old_offset, old_match_len)) = candidate {
826 if match_len > old_match_len
827 || (match_len == old_match_len && offset < old_offset)
828 {
829 candidate = Some((offset, match_len));
830 }
831 } else {
832 candidate = Some((offset, match_len));
833 }
834 }
835 }
836 }
837
838 if let Some((offset, match_len)) = candidate {
839 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
842
843 let last_entry = self.window.last().unwrap();
845 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
846
847 self.suffix_idx += match_len;
849 self.last_idx_in_sequence = self.suffix_idx;
850 let _ = encode_offset_with_history(
851 offset as u32,
852 literals.len() as u32,
853 &mut self.offset_hist,
854 );
855 handle_sequence(Sequence::Triple {
856 literals,
857 offset,
858 match_len,
859 });
860
861 return true;
862 }
863
864 let last_entry = self.window.last_mut().unwrap();
865 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
866 if !last_entry.suffixes.contains_key(key) {
867 last_entry.suffixes.insert(key, self.suffix_idx);
868 }
869 self.suffix_idx += 1;
870 }
871 }
872
873 #[inline(always)]
875 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
876 let max = a.len().min(b.len());
877 let chunk = core::mem::size_of::<usize>();
878 let mut off = 0usize;
879 let lhs = a.as_ptr();
880 let rhs = b.as_ptr();
881
882 while off + chunk <= max {
883 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
884 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
885 let diff = lhs_word ^ rhs_word;
886 if diff != 0 {
887 return off + Self::mismatch_byte_index(diff);
888 }
889 off += chunk;
890 }
891
892 off + core::iter::zip(&a[off..max], &b[off..max])
893 .take_while(|(x, y)| x == y)
894 .count()
895 }
896
897 #[inline(always)]
899 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
900 let start = self.suffix_idx;
901 let last_entry = self.window.last_mut().unwrap();
902 if last_entry.data.len() < MIN_MATCH_LEN {
903 return;
904 }
905 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
906 if insert_limit > start {
907 let data = last_entry.data.as_slice();
908 let suffixes = &mut last_entry.suffixes;
909 if fill_step == FAST_HASH_FILL_STEP {
910 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
911 } else {
912 let mut pos = start;
913 while pos < insert_limit {
914 Self::insert_suffix_if_absent(data, suffixes, pos);
915 pos += fill_step;
916 }
917 }
918 }
919
920 if idx >= start + MIN_MATCH_LEN {
921 let tail_start = idx - MIN_MATCH_LEN;
922 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
923 if !last_entry.suffixes.contains_key(tail_key) {
924 last_entry.suffixes.insert(tail_key, tail_start);
925 }
926 }
927 }
928
929 #[inline(always)]
930 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
931 debug_assert!(
932 pos + MIN_MATCH_LEN <= data.len(),
933 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
934 pos,
935 MIN_MATCH_LEN,
936 data.len()
937 );
938 let key = &data[pos..pos + MIN_MATCH_LEN];
939 if !suffixes.contains_key(key) {
940 suffixes.insert(key, pos);
941 }
942 }
943
944 #[inline(always)]
945 fn add_suffixes_interleaved_fast(
946 data: &[u8],
947 suffixes: &mut SuffixStore,
948 start: usize,
949 insert_limit: usize,
950 ) {
951 let lane = FAST_HASH_FILL_STEP;
952 let mut pos = start;
953
954 while pos + lane * 3 < insert_limit {
957 let p0 = pos;
958 let p1 = pos + lane;
959 let p2 = pos + lane * 2;
960 let p3 = pos + lane * 3;
961
962 Self::insert_suffix_if_absent(data, suffixes, p0);
963 Self::insert_suffix_if_absent(data, suffixes, p1);
964 Self::insert_suffix_if_absent(data, suffixes, p2);
965 Self::insert_suffix_if_absent(data, suffixes, p3);
966
967 pos += lane * 4;
968 }
969
970 while pos < insert_limit {
971 Self::insert_suffix_if_absent(data, suffixes, pos);
972 pos += lane;
973 }
974 }
975
976 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
977 if literals_len != 0 {
978 return None;
979 }
980
981 let reps = [
982 Some(self.offset_hist[1] as usize),
983 Some(self.offset_hist[2] as usize),
984 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
985 ];
986
987 let mut best: Option<(usize, usize)> = None;
988 let mut seen = [0usize; 3];
989 let mut seen_len = 0usize;
990 for offset in reps.into_iter().flatten() {
991 if offset == 0 {
992 continue;
993 }
994 if seen[..seen_len].contains(&offset) {
995 continue;
996 }
997 seen[seen_len] = offset;
998 seen_len += 1;
999
1000 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1001 continue;
1002 };
1003 if match_len < MIN_MATCH_LEN {
1004 continue;
1005 }
1006
1007 if best.is_none_or(|(old_offset, old_len)| {
1008 match_len > old_len || (match_len == old_len && offset < old_offset)
1009 }) {
1010 best = Some((offset, match_len));
1011 }
1012 }
1013 best
1014 }
1015
1016 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1017 if offset == 0 {
1018 return None;
1019 }
1020
1021 let last_idx = self.window.len().checked_sub(1)?;
1022 let last_entry = &self.window[last_idx];
1023 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1024 if offset > searchable_prefix {
1025 return None;
1026 }
1027
1028 let mut remaining = offset;
1029 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1030 (last_idx, self.suffix_idx - remaining)
1031 } else {
1032 remaining -= self.suffix_idx;
1033 let mut found = None;
1034 for entry_idx in (0..last_idx).rev() {
1035 let len = self.window[entry_idx].data.len();
1036 if remaining <= len {
1037 found = Some((entry_idx, len - remaining));
1038 break;
1039 }
1040 remaining -= len;
1041 }
1042 found?
1043 };
1044
1045 let match_entry = &self.window[entry_idx];
1046 let match_slice = &match_entry.data[match_index..];
1047
1048 Some(Self::common_prefix_len(match_slice, data_slice))
1049 }
1050
1051 fn skip_matching(&mut self) {
1053 let len = self.window.last().unwrap().data.len();
1054 self.add_suffixes_till(len, 1);
1055 self.suffix_idx = len;
1056 self.last_idx_in_sequence = len;
1057 }
1058
1059 fn add_data(
1062 &mut self,
1063 data: Vec<u8>,
1064 suffixes: SuffixStore,
1065 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1066 ) {
1067 assert!(
1068 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1069 );
1070 self.reserve(data.len(), reuse_space);
1071 #[cfg(debug_assertions)]
1072 self.concat_window.extend_from_slice(&data);
1073
1074 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1075 for entry in self.window.iter_mut() {
1076 entry.base_offset += last_len;
1077 }
1078 }
1079
1080 let len = data.len();
1081 self.window.push(WindowEntry {
1082 data,
1083 suffixes,
1084 base_offset: 0,
1085 });
1086 self.window_size += len;
1087 self.suffix_idx = 0;
1088 self.last_idx_in_sequence = 0;
1089 }
1090
1091 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1094 assert!(self.max_window_size >= amount);
1095 while self.window_size + amount > self.max_window_size {
1096 let removed = self.window.remove(0);
1097 self.window_size -= removed.data.len();
1098 #[cfg(debug_assertions)]
1099 self.concat_window.drain(0..removed.data.len());
1100
1101 let WindowEntry {
1102 suffixes,
1103 data: leaked_vec,
1104 base_offset: _,
1105 } = removed;
1106 reuse_space(leaked_vec, suffixes);
1107 }
1108 }
1109}
1110
1111struct DfastMatchGenerator {
1112 max_window_size: usize,
1113 window: VecDeque<Vec<u8>>,
1114 window_size: usize,
1115 history: Vec<u8>,
1118 history_start: usize,
1119 history_abs_start: usize,
1120 offset_hist: [u32; 3],
1121 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1122 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1123 hash_bits: usize,
1124 lazy_depth: u8,
1126}
1127
1128#[derive(Copy, Clone)]
1129struct MatchCandidate {
1130 start: usize,
1131 offset: usize,
1132 match_len: usize,
1133}
1134
1135impl DfastMatchGenerator {
1136 fn new(max_window_size: usize) -> Self {
1137 Self {
1138 max_window_size,
1139 window: VecDeque::new(),
1140 window_size: 0,
1141 history: Vec::new(),
1142 history_start: 0,
1143 history_abs_start: 0,
1144 offset_hist: [1, 4, 8],
1145 short_hash: Vec::new(),
1146 long_hash: Vec::new(),
1147 hash_bits: DFAST_HASH_BITS,
1148 lazy_depth: 1,
1149 }
1150 }
1151
1152 fn set_hash_bits(&mut self, bits: usize) {
1153 let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1154 if self.hash_bits != clamped {
1155 self.hash_bits = clamped;
1156 self.short_hash = Vec::new();
1157 self.long_hash = Vec::new();
1158 }
1159 }
1160
1161 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1162 self.window_size = 0;
1163 self.history.clear();
1164 self.history_start = 0;
1165 self.history_abs_start = 0;
1166 self.offset_hist = [1, 4, 8];
1167 if !self.short_hash.is_empty() {
1168 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1169 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1170 }
1171 for mut data in self.window.drain(..) {
1172 data.resize(data.capacity(), 0);
1173 reuse_space(data);
1174 }
1175 }
1176
1177 fn get_last_space(&self) -> &[u8] {
1178 self.window.back().unwrap().as_slice()
1179 }
1180
1181 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1182 assert!(data.len() <= self.max_window_size);
1183 while self.window_size + data.len() > self.max_window_size {
1184 let removed = self.window.pop_front().unwrap();
1185 self.window_size -= removed.len();
1186 self.history_start += removed.len();
1187 self.history_abs_start += removed.len();
1188 reuse_space(removed);
1189 }
1190 self.compact_history();
1191 self.history.extend_from_slice(&data);
1192 self.window_size += data.len();
1193 self.window.push_back(data);
1194 }
1195
1196 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1197 while self.window_size > self.max_window_size {
1198 let removed = self.window.pop_front().unwrap();
1199 self.window_size -= removed.len();
1200 self.history_start += removed.len();
1201 self.history_abs_start += removed.len();
1202 reuse_space(removed);
1203 }
1204 }
1205
1206 fn skip_matching(&mut self) {
1207 self.ensure_hash_tables();
1208 let current_len = self.window.back().unwrap().len();
1209 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1210 self.insert_positions(current_abs_start, current_abs_start + current_len);
1211 }
1212
1213 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1214 self.ensure_hash_tables();
1215
1216 let current_len = self.window.back().unwrap().len();
1217 if current_len == 0 {
1218 return;
1219 }
1220
1221 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1222
1223 let mut pos = 0usize;
1224 let mut literals_start = 0usize;
1225 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1226 let abs_pos = current_abs_start + pos;
1227 let lit_len = pos - literals_start;
1228
1229 let best = self.best_match(abs_pos, lit_len);
1230 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1231 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1232 let current = self.window.back().unwrap().as_slice();
1233 let start = candidate.start - current_abs_start;
1234 let literals = ¤t[literals_start..start];
1235 handle_sequence(Sequence::Triple {
1236 literals,
1237 offset: candidate.offset,
1238 match_len: candidate.match_len,
1239 });
1240 let _ = encode_offset_with_history(
1243 candidate.offset as u32,
1244 literals.len() as u32,
1245 &mut self.offset_hist,
1246 );
1247 pos = start + candidate.match_len;
1248 literals_start = pos;
1249 } else {
1250 self.insert_position(abs_pos);
1251 pos += 1;
1252 }
1253 }
1254
1255 if literals_start < current_len {
1256 let current = self.window.back().unwrap().as_slice();
1263 handle_sequence(Sequence::Literals {
1264 literals: ¤t[literals_start..],
1265 });
1266 }
1267 }
1268
1269 fn ensure_hash_tables(&mut self) {
1270 let table_len = 1usize << self.hash_bits;
1271 if self.short_hash.len() != table_len {
1272 self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1276 self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1277 }
1278 }
1279
1280 fn compact_history(&mut self) {
1281 if self.history_start == 0 {
1282 return;
1283 }
1284 if self.history_start >= self.max_window_size
1285 || self.history_start * 2 >= self.history.len()
1286 {
1287 self.history.drain(..self.history_start);
1288 self.history_start = 0;
1289 }
1290 }
1291
1292 fn live_history(&self) -> &[u8] {
1293 &self.history[self.history_start..]
1294 }
1295
1296 fn history_abs_end(&self) -> usize {
1297 self.history_abs_start + self.live_history().len()
1298 }
1299
1300 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1301 let rep = self.repcode_candidate(abs_pos, lit_len);
1302 let hash = self.hash_candidate(abs_pos, lit_len);
1303 Self::better_candidate(rep, hash)
1304 }
1305
1306 fn pick_lazy_match(
1307 &self,
1308 abs_pos: usize,
1309 lit_len: usize,
1310 best: Option<MatchCandidate>,
1311 ) -> Option<MatchCandidate> {
1312 let best = best?;
1313 if best.match_len >= DFAST_TARGET_LEN
1314 || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end()
1315 {
1316 return Some(best);
1317 }
1318
1319 let next = self.best_match(abs_pos + 1, lit_len + 1);
1321 if let Some(next) = next
1322 && (next.match_len > best.match_len
1323 || (next.match_len == best.match_len && next.offset < best.offset))
1324 {
1325 return None;
1326 }
1327
1328 if self.lazy_depth >= 2 && abs_pos + 2 + DFAST_MIN_MATCH_LEN <= self.history_abs_end() {
1330 let next2 = self.best_match(abs_pos + 2, lit_len + 2);
1331 if let Some(next2) = next2
1332 && next2.match_len > best.match_len + 1
1333 {
1334 return None;
1335 }
1336 }
1337
1338 Some(best)
1339 }
1340
1341 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1342 let reps = if lit_len == 0 {
1343 [
1344 Some(self.offset_hist[1] as usize),
1345 Some(self.offset_hist[2] as usize),
1346 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1347 ]
1348 } else {
1349 [
1350 Some(self.offset_hist[0] as usize),
1351 Some(self.offset_hist[1] as usize),
1352 Some(self.offset_hist[2] as usize),
1353 ]
1354 };
1355
1356 let mut best = None;
1357 for rep in reps.into_iter().flatten() {
1358 if rep == 0 || rep > abs_pos {
1359 continue;
1360 }
1361 let candidate_pos = abs_pos - rep;
1362 if candidate_pos < self.history_abs_start {
1363 continue;
1364 }
1365 let concat = self.live_history();
1366 let candidate_idx = candidate_pos - self.history_abs_start;
1367 let current_idx = abs_pos - self.history_abs_start;
1368 if current_idx + DFAST_MIN_MATCH_LEN > concat.len() {
1369 continue;
1370 }
1371 let match_len =
1372 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1373 if match_len >= DFAST_MIN_MATCH_LEN {
1374 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1375 best = Self::better_candidate(best, Some(candidate));
1376 }
1377 }
1378 best
1379 }
1380
1381 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1382 let concat = self.live_history();
1383 let current_idx = abs_pos - self.history_abs_start;
1384 let mut best = None;
1385 for candidate_pos in self.long_candidates(abs_pos) {
1386 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1387 continue;
1388 }
1389 let candidate_idx = candidate_pos - self.history_abs_start;
1390 let match_len =
1391 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1392 if match_len >= DFAST_MIN_MATCH_LEN {
1393 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1394 best = Self::better_candidate(best, Some(candidate));
1395 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1396 return best;
1397 }
1398 }
1399 }
1400
1401 for candidate_pos in self.short_candidates(abs_pos) {
1402 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1403 continue;
1404 }
1405 let candidate_idx = candidate_pos - self.history_abs_start;
1406 let match_len =
1407 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1408 if match_len >= DFAST_MIN_MATCH_LEN {
1409 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1410 best = Self::better_candidate(best, Some(candidate));
1411 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1412 return best;
1413 }
1414 }
1415 }
1416 best
1417 }
1418
1419 fn extend_backwards(
1420 &self,
1421 mut candidate_pos: usize,
1422 mut abs_pos: usize,
1423 mut match_len: usize,
1424 lit_len: usize,
1425 ) -> MatchCandidate {
1426 let concat = self.live_history();
1427 let min_abs_pos = abs_pos - lit_len;
1428 while abs_pos > min_abs_pos
1429 && candidate_pos > self.history_abs_start
1430 && concat[candidate_pos - self.history_abs_start - 1]
1431 == concat[abs_pos - self.history_abs_start - 1]
1432 {
1433 candidate_pos -= 1;
1434 abs_pos -= 1;
1435 match_len += 1;
1436 }
1437 MatchCandidate {
1438 start: abs_pos,
1439 offset: abs_pos - candidate_pos,
1440 match_len,
1441 }
1442 }
1443
1444 fn better_candidate(
1445 lhs: Option<MatchCandidate>,
1446 rhs: Option<MatchCandidate>,
1447 ) -> Option<MatchCandidate> {
1448 match (lhs, rhs) {
1449 (None, other) | (other, None) => other,
1450 (Some(lhs), Some(rhs)) => {
1451 if rhs.match_len > lhs.match_len
1452 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1453 {
1454 Some(rhs)
1455 } else {
1456 Some(lhs)
1457 }
1458 }
1459 }
1460 }
1461
1462 fn insert_positions(&mut self, start: usize, end: usize) {
1463 for pos in start..end {
1464 self.insert_position(pos);
1465 }
1466 }
1467
1468 fn insert_position(&mut self, pos: usize) {
1469 let idx = pos - self.history_abs_start;
1470 let short = {
1471 let concat = self.live_history();
1472 (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1473 };
1474 if let Some(short) = short {
1475 let bucket = &mut self.short_hash[short];
1476 if bucket[0] != pos {
1477 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1478 bucket[0] = pos;
1479 }
1480 }
1481
1482 let long = {
1483 let concat = self.live_history();
1484 (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1485 };
1486 if let Some(long) = long {
1487 let bucket = &mut self.long_hash[long];
1488 if bucket[0] != pos {
1489 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1490 bucket[0] = pos;
1491 }
1492 }
1493 }
1494
1495 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1496 let concat = self.live_history();
1497 let idx = pos - self.history_abs_start;
1498 (idx + 4 <= concat.len())
1499 .then(|| self.short_hash[self.hash4(&concat[idx..])])
1500 .into_iter()
1501 .flatten()
1502 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1503 }
1504
1505 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1506 let concat = self.live_history();
1507 let idx = pos - self.history_abs_start;
1508 (idx + 8 <= concat.len())
1509 .then(|| self.long_hash[self.hash8(&concat[idx..])])
1510 .into_iter()
1511 .flatten()
1512 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1513 }
1514
1515 fn hash4(&self, data: &[u8]) -> usize {
1516 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1517 self.hash_index(value)
1518 }
1519
1520 fn hash8(&self, data: &[u8]) -> usize {
1521 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1522 self.hash_index(value)
1523 }
1524
1525 fn hash_index(&self, value: u64) -> usize {
1526 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1527 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1528 }
1529}
1530
1531struct HcMatchGenerator {
1532 max_window_size: usize,
1533 window: VecDeque<Vec<u8>>,
1534 window_size: usize,
1535 history: Vec<u8>,
1536 history_start: usize,
1537 history_abs_start: usize,
1538 offset_hist: [u32; 3],
1539 hash_table: Vec<u32>,
1540 chain_table: Vec<u32>,
1541 lazy_depth: u8,
1542 hash_log: usize,
1543 chain_log: usize,
1544 search_depth: usize,
1545 target_len: usize,
1546}
1547
1548impl HcMatchGenerator {
1549 fn new(max_window_size: usize) -> Self {
1550 Self {
1551 max_window_size,
1552 window: VecDeque::new(),
1553 window_size: 0,
1554 history: Vec::new(),
1555 history_start: 0,
1556 history_abs_start: 0,
1557 offset_hist: [1, 4, 8],
1558 hash_table: Vec::new(),
1559 chain_table: Vec::new(),
1560 lazy_depth: 2,
1561 hash_log: HC_HASH_LOG,
1562 chain_log: HC_CHAIN_LOG,
1563 search_depth: HC_SEARCH_DEPTH,
1564 target_len: HC_TARGET_LEN,
1565 }
1566 }
1567
1568 fn configure(&mut self, config: HcConfig) {
1569 let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
1570 self.hash_log = config.hash_log;
1571 self.chain_log = config.chain_log;
1572 self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
1573 self.target_len = config.target_len;
1574 if resize && !self.hash_table.is_empty() {
1575 self.hash_table.clear();
1577 self.chain_table.clear();
1578 }
1579 }
1580
1581 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1582 self.window_size = 0;
1583 self.history.clear();
1584 self.history_start = 0;
1585 self.history_abs_start = 0;
1586 self.offset_hist = [1, 4, 8];
1587 if !self.hash_table.is_empty() {
1588 self.hash_table.fill(HC_EMPTY);
1589 self.chain_table.fill(HC_EMPTY);
1590 }
1591 for mut data in self.window.drain(..) {
1592 data.resize(data.capacity(), 0);
1593 reuse_space(data);
1594 }
1595 }
1596
1597 fn get_last_space(&self) -> &[u8] {
1598 self.window.back().unwrap().as_slice()
1599 }
1600
1601 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1605 assert!(data.len() <= self.max_window_size);
1606 while self.window_size + data.len() > self.max_window_size {
1607 let removed = self.window.pop_front().unwrap();
1608 self.window_size -= removed.len();
1609 self.history_start += removed.len();
1610 self.history_abs_start += removed.len();
1611 reuse_space(removed);
1612 }
1613 self.compact_history();
1614 self.history.extend_from_slice(&data);
1615 self.window_size += data.len();
1616 self.window.push_back(data);
1617 }
1618
1619 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1620 while self.window_size > self.max_window_size {
1621 let removed = self.window.pop_front().unwrap();
1622 self.window_size -= removed.len();
1623 self.history_start += removed.len();
1624 self.history_abs_start += removed.len();
1625 reuse_space(removed);
1626 }
1627 }
1628
1629 fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
1632 let backfill_start = current_abs_start
1633 .saturating_sub(3)
1634 .max(self.history_abs_start);
1635 if backfill_start < current_abs_start {
1636 self.insert_positions(backfill_start, current_abs_start);
1637 }
1638 }
1639
1640 fn skip_matching(&mut self) {
1641 self.ensure_tables();
1642 let current_len = self.window.back().unwrap().len();
1643 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1644 self.backfill_boundary_positions(current_abs_start);
1645 self.insert_positions(current_abs_start, current_abs_start + current_len);
1646 }
1647
1648 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1649 self.ensure_tables();
1650
1651 let current_len = self.window.back().unwrap().len();
1652 if current_len == 0 {
1653 return;
1654 }
1655
1656 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1657 self.backfill_boundary_positions(current_abs_start);
1658
1659 let mut pos = 0usize;
1660 let mut literals_start = 0usize;
1661 while pos + HC_MIN_MATCH_LEN <= current_len {
1662 let abs_pos = current_abs_start + pos;
1663 let lit_len = pos - literals_start;
1664
1665 let best = self.find_best_match(abs_pos, lit_len);
1666 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1667 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1668 let current = self.window.back().unwrap().as_slice();
1669 let start = candidate.start - current_abs_start;
1670 let literals = ¤t[literals_start..start];
1671 handle_sequence(Sequence::Triple {
1672 literals,
1673 offset: candidate.offset,
1674 match_len: candidate.match_len,
1675 });
1676 let _ = encode_offset_with_history(
1677 candidate.offset as u32,
1678 literals.len() as u32,
1679 &mut self.offset_hist,
1680 );
1681 pos = start + candidate.match_len;
1682 literals_start = pos;
1683 } else {
1684 self.insert_position(abs_pos);
1685 pos += 1;
1686 }
1687 }
1688
1689 while pos + 4 <= current_len {
1692 self.insert_position(current_abs_start + pos);
1693 pos += 1;
1694 }
1695
1696 if literals_start < current_len {
1697 let current = self.window.back().unwrap().as_slice();
1698 handle_sequence(Sequence::Literals {
1699 literals: ¤t[literals_start..],
1700 });
1701 }
1702 }
1703
1704 fn ensure_tables(&mut self) {
1705 if self.hash_table.is_empty() {
1706 self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
1707 self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
1708 }
1709 }
1710
1711 fn compact_history(&mut self) {
1712 if self.history_start == 0 {
1713 return;
1714 }
1715 if self.history_start >= self.max_window_size
1716 || self.history_start * 2 >= self.history.len()
1717 {
1718 self.history.drain(..self.history_start);
1719 self.history_start = 0;
1720 }
1721 }
1722
1723 fn live_history(&self) -> &[u8] {
1724 &self.history[self.history_start..]
1725 }
1726
1727 fn history_abs_end(&self) -> usize {
1728 self.history_abs_start + self.live_history().len()
1729 }
1730
1731 fn hash_position(&self, data: &[u8]) -> usize {
1732 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1733 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1734 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
1735 }
1736
1737 fn insert_position(&mut self, abs_pos: usize) {
1738 let idx = abs_pos - self.history_abs_start;
1739 let concat = self.live_history();
1740 if idx + 4 > concat.len() {
1741 return;
1742 }
1743 let hash = self.hash_position(&concat[idx..]);
1744 if abs_pos >= u32::MAX as usize {
1749 return;
1750 }
1751 let pos_u32 = abs_pos as u32;
1752 let stored = pos_u32 + 1;
1753 let chain_idx = pos_u32 as usize & ((1 << self.chain_log) - 1);
1754 let prev = self.hash_table[hash];
1755 self.chain_table[chain_idx] = prev;
1756 self.hash_table[hash] = stored;
1757 }
1758
1759 fn insert_positions(&mut self, start: usize, end: usize) {
1760 for pos in start..end {
1761 self.insert_position(pos);
1762 }
1763 }
1764
1765 fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
1768 let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
1769 let idx = abs_pos - self.history_abs_start;
1770 let concat = self.live_history();
1771 if idx + 4 > concat.len() {
1772 return buf;
1773 }
1774 let hash = self.hash_position(&concat[idx..]);
1775 let chain_mask = (1 << self.chain_log) - 1;
1776
1777 let mut cur = self.hash_table[hash];
1778 let mut filled = 0;
1779 let mut steps = 0;
1786 let max_chain_steps = self.search_depth * 4;
1787 while filled < self.search_depth && steps < max_chain_steps {
1788 if cur == HC_EMPTY {
1789 break;
1790 }
1791 let candidate_abs = cur.wrapping_sub(1) as usize;
1792 let next = self.chain_table[candidate_abs & chain_mask];
1793 steps += 1;
1794 if next == cur {
1795 if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
1798 buf[filled] = candidate_abs;
1799 }
1800 break;
1801 }
1802 cur = next;
1803 if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
1804 continue;
1805 }
1806 buf[filled] = candidate_abs;
1807 filled += 1;
1808 }
1809 buf
1810 }
1811
1812 fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1813 let rep = self.repcode_candidate(abs_pos, lit_len);
1814 let hash = self.hash_chain_candidate(abs_pos, lit_len);
1815 Self::better_candidate(rep, hash)
1816 }
1817
1818 fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1819 let concat = self.live_history();
1820 let current_idx = abs_pos - self.history_abs_start;
1821 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1822 return None;
1823 }
1824
1825 let mut best: Option<MatchCandidate> = None;
1826 for candidate_abs in self.chain_candidates(abs_pos) {
1827 if candidate_abs == usize::MAX {
1828 break;
1829 }
1830 let candidate_idx = candidate_abs - self.history_abs_start;
1831 let match_len =
1832 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1833 if match_len >= HC_MIN_MATCH_LEN {
1834 let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
1835 best = Self::better_candidate(best, Some(candidate));
1836 if best.is_some_and(|b| b.match_len >= self.target_len) {
1837 return best;
1838 }
1839 }
1840 }
1841 best
1842 }
1843
1844 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1845 let reps = if lit_len == 0 {
1846 [
1847 Some(self.offset_hist[1] as usize),
1848 Some(self.offset_hist[2] as usize),
1849 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1850 ]
1851 } else {
1852 [
1853 Some(self.offset_hist[0] as usize),
1854 Some(self.offset_hist[1] as usize),
1855 Some(self.offset_hist[2] as usize),
1856 ]
1857 };
1858
1859 let concat = self.live_history();
1860 let current_idx = abs_pos - self.history_abs_start;
1861 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1862 return None;
1863 }
1864
1865 let mut best = None;
1866 for rep in reps.into_iter().flatten() {
1867 if rep == 0 || rep > abs_pos {
1868 continue;
1869 }
1870 let candidate_pos = abs_pos - rep;
1871 if candidate_pos < self.history_abs_start {
1872 continue;
1873 }
1874 let candidate_idx = candidate_pos - self.history_abs_start;
1875 let match_len =
1876 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1877 if match_len >= HC_MIN_MATCH_LEN {
1878 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1879 best = Self::better_candidate(best, Some(candidate));
1880 }
1881 }
1882 best
1883 }
1884
1885 fn extend_backwards(
1886 &self,
1887 mut candidate_pos: usize,
1888 mut abs_pos: usize,
1889 mut match_len: usize,
1890 lit_len: usize,
1891 ) -> MatchCandidate {
1892 let concat = self.live_history();
1893 let min_abs_pos = abs_pos - lit_len;
1894 while abs_pos > min_abs_pos
1895 && candidate_pos > self.history_abs_start
1896 && concat[candidate_pos - self.history_abs_start - 1]
1897 == concat[abs_pos - self.history_abs_start - 1]
1898 {
1899 candidate_pos -= 1;
1900 abs_pos -= 1;
1901 match_len += 1;
1902 }
1903 MatchCandidate {
1904 start: abs_pos,
1905 offset: abs_pos - candidate_pos,
1906 match_len,
1907 }
1908 }
1909
1910 fn better_candidate(
1911 lhs: Option<MatchCandidate>,
1912 rhs: Option<MatchCandidate>,
1913 ) -> Option<MatchCandidate> {
1914 match (lhs, rhs) {
1915 (None, other) | (other, None) => other,
1916 (Some(lhs), Some(rhs)) => {
1917 let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
1918 let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
1919 if rhs_gain > lhs_gain {
1920 Some(rhs)
1921 } else {
1922 Some(lhs)
1923 }
1924 }
1925 }
1926 }
1927
1928 fn match_gain(match_len: usize, offset: usize) -> i32 {
1929 debug_assert!(
1930 offset > 0,
1931 "zstd offsets are 1-indexed, offset=0 is invalid"
1932 );
1933 let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
1934 (match_len as i32) * 4 - offset_bits
1935 }
1936
1937 fn pick_lazy_match(
1941 &self,
1942 abs_pos: usize,
1943 lit_len: usize,
1944 best: Option<MatchCandidate>,
1945 ) -> Option<MatchCandidate> {
1946 let best = best?;
1947 if best.match_len >= self.target_len
1948 || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
1949 {
1950 return Some(best);
1951 }
1952
1953 let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
1954
1955 let next = self.find_best_match(abs_pos + 1, lit_len + 1);
1957 if let Some(next) = next {
1958 let next_gain = Self::match_gain(next.match_len, next.offset);
1959 if next_gain > current_gain {
1960 return None;
1961 }
1962 }
1963
1964 if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
1966 let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
1967 if let Some(next2) = next2 {
1968 let next2_gain = Self::match_gain(next2.match_len, next2.offset);
1969 if next2_gain > current_gain + 4 {
1971 return None;
1972 }
1973 }
1974 }
1975
1976 Some(best)
1977 }
1978}
1979
1980#[test]
1981fn matches() {
1982 let mut matcher = MatchGenerator::new(1000);
1983 let mut original_data = Vec::new();
1984 let mut reconstructed = Vec::new();
1985
1986 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
1987 Sequence::Literals { literals } => {
1988 assert!(!literals.is_empty());
1989 reconstructed.extend_from_slice(literals);
1990 }
1991 Sequence::Triple {
1992 literals,
1993 offset,
1994 match_len,
1995 } => {
1996 assert!(offset > 0);
1997 assert!(match_len >= MIN_MATCH_LEN);
1998 reconstructed.extend_from_slice(literals);
1999 assert!(offset <= reconstructed.len());
2000 let start = reconstructed.len() - offset;
2001 for i in 0..match_len {
2002 let byte = reconstructed[start + i];
2003 reconstructed.push(byte);
2004 }
2005 }
2006 };
2007
2008 matcher.add_data(
2009 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2010 SuffixStore::with_capacity(100),
2011 |_, _| {},
2012 );
2013 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2014
2015 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2016
2017 assert!(!matcher.next_sequence(|_| {}));
2018
2019 matcher.add_data(
2020 alloc::vec![
2021 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2022 ],
2023 SuffixStore::with_capacity(100),
2024 |_, _| {},
2025 );
2026 original_data.extend_from_slice(&[
2027 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2028 ]);
2029
2030 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2031 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2032 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2033 assert!(!matcher.next_sequence(|_| {}));
2034
2035 matcher.add_data(
2036 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2037 SuffixStore::with_capacity(100),
2038 |_, _| {},
2039 );
2040 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2041
2042 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2043 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2044 assert!(!matcher.next_sequence(|_| {}));
2045
2046 matcher.add_data(
2047 alloc::vec![0, 0, 0, 0, 0],
2048 SuffixStore::with_capacity(100),
2049 |_, _| {},
2050 );
2051 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2052
2053 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2054 assert!(!matcher.next_sequence(|_| {}));
2055
2056 matcher.add_data(
2057 alloc::vec![7, 8, 9, 10, 11],
2058 SuffixStore::with_capacity(100),
2059 |_, _| {},
2060 );
2061 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2062
2063 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2064 assert!(!matcher.next_sequence(|_| {}));
2065
2066 matcher.add_data(
2067 alloc::vec![1, 3, 5, 7, 9],
2068 SuffixStore::with_capacity(100),
2069 |_, _| {},
2070 );
2071 matcher.skip_matching();
2072 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2073 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2074 assert!(!matcher.next_sequence(|_| {}));
2075
2076 matcher.add_data(
2077 alloc::vec![1, 3, 5, 7, 9],
2078 SuffixStore::with_capacity(100),
2079 |_, _| {},
2080 );
2081 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2082
2083 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2084 assert!(!matcher.next_sequence(|_| {}));
2085
2086 matcher.add_data(
2087 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2088 SuffixStore::with_capacity(100),
2089 |_, _| {},
2090 );
2091 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2092
2093 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2094 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2095 assert!(!matcher.next_sequence(|_| {}));
2096
2097 assert_eq!(reconstructed, original_data);
2098}
2099
2100#[test]
2101fn dfast_matches_roundtrip_multi_block_pattern() {
2102 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2103 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2104 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2105
2106 let mut matcher = DfastMatchGenerator::new(1 << 22);
2107 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2108 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2109 Sequence::Triple {
2110 literals,
2111 offset,
2112 match_len,
2113 } => {
2114 decoded.extend_from_slice(literals);
2115 let start = decoded.len() - offset;
2116 for i in 0..match_len {
2117 let byte = decoded[start + i];
2118 decoded.push(byte);
2119 }
2120 }
2121 };
2122
2123 matcher.add_data(first_block.clone(), |_| {});
2124 let mut history = Vec::new();
2125 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2126 assert_eq!(history, first_block);
2127
2128 matcher.add_data(second_block.clone(), |_| {});
2129 let prefix_len = history.len();
2130 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2131
2132 assert_eq!(&history[prefix_len..], second_block.as_slice());
2133}
2134
2135#[test]
2136fn driver_switches_backends_and_initializes_dfast_via_reset() {
2137 let mut driver = MatchGeneratorDriver::new(32, 2);
2138
2139 driver.reset(CompressionLevel::Default);
2140 assert_eq!(driver.window_size(), (1u64 << 22));
2141
2142 let mut first = driver.get_next_space();
2143 first[..12].copy_from_slice(b"abcabcabcabc");
2144 first.truncate(12);
2145 driver.commit_space(first);
2146 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2147 driver.skip_matching();
2148
2149 let mut second = driver.get_next_space();
2150 second[..12].copy_from_slice(b"abcabcabcabc");
2151 second.truncate(12);
2152 driver.commit_space(second);
2153
2154 let mut reconstructed = b"abcabcabcabc".to_vec();
2155 driver.start_matching(|seq| match seq {
2156 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2157 Sequence::Triple {
2158 literals,
2159 offset,
2160 match_len,
2161 } => {
2162 reconstructed.extend_from_slice(literals);
2163 let start = reconstructed.len() - offset;
2164 for i in 0..match_len {
2165 let byte = reconstructed[start + i];
2166 reconstructed.push(byte);
2167 }
2168 }
2169 });
2170 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2171
2172 driver.reset(CompressionLevel::Fastest);
2173 assert_eq!(driver.window_size(), (1u64 << 17));
2174}
2175
2176#[test]
2177fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2178 let mut driver = MatchGeneratorDriver::new(32, 2);
2179
2180 driver.reset(CompressionLevel::Default);
2181 let mut space = driver.get_next_space();
2182 space[..12].copy_from_slice(b"abcabcabcabc");
2183 space.truncate(12);
2184 driver.commit_space(space);
2185 driver.skip_matching();
2186 let full_tables = driver.dfast_matcher().short_hash.len();
2187 assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2188
2189 driver.set_source_size_hint(1024);
2190 driver.reset(CompressionLevel::Default);
2191 let mut space = driver.get_next_space();
2192 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2193 space.truncate(12);
2194 driver.commit_space(space);
2195 driver.skip_matching();
2196 let hinted_tables = driver.dfast_matcher().short_hash.len();
2197
2198 assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG);
2199 assert_eq!(hinted_tables, 1 << MIN_WINDOW_LOG);
2200 assert!(
2201 hinted_tables < full_tables,
2202 "tiny source hint should reduce dfast table footprint"
2203 );
2204}
2205
2206#[test]
2207fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
2208 let mut driver = MatchGeneratorDriver::new(32, 2);
2209
2210 driver.reset(CompressionLevel::Level(2));
2211 let mut space = driver.get_next_space();
2212 space[..12].copy_from_slice(b"abcabcabcabc");
2213 space.truncate(12);
2214 driver.commit_space(space);
2215 driver.skip_matching();
2216
2217 let table_len = driver.dfast_matcher().short_hash.len();
2218 assert_eq!(
2219 table_len,
2220 1 << DFAST_HASH_BITS,
2221 "unhinted Level(2) should keep default dfast table size"
2222 );
2223}
2224
2225#[test]
2226fn simple_backend_rejects_undersized_pooled_suffix_store() {
2227 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2228 driver.reset(CompressionLevel::Fastest);
2229
2230 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
2231
2232 let mut space = driver.get_next_space();
2233 space.clear();
2234 space.resize(4096, 0xAB);
2235 driver.commit_space(space);
2236
2237 let last_suffix_slots = driver
2238 .match_generator
2239 .window
2240 .last()
2241 .expect("window entry must exist after commit")
2242 .suffixes
2243 .slots
2244 .len();
2245 assert!(
2246 last_suffix_slots >= 4096,
2247 "undersized pooled suffix store must not be reused for larger blocks"
2248 );
2249}
2250
2251#[test]
2252fn source_hint_clamps_driver_slice_size_to_window() {
2253 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2254 driver.set_source_size_hint(1024);
2255 driver.reset(CompressionLevel::Default);
2256
2257 let window = driver.window_size() as usize;
2258 assert_eq!(window, 1024);
2259 assert_eq!(driver.slice_size, window);
2260
2261 let space = driver.get_next_space();
2262 assert_eq!(space.len(), window);
2263 driver.commit_space(space);
2264}
2265
2266#[test]
2267fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
2268 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2269 driver.reset(CompressionLevel::Default);
2270
2271 let large = driver.get_next_space();
2272 let large_capacity = large.capacity();
2273 assert!(large_capacity >= 128 * 1024);
2274 driver.commit_space(large);
2275
2276 driver.set_source_size_hint(1024);
2277 driver.reset(CompressionLevel::Default);
2278
2279 let small = driver.get_next_space();
2280 assert_eq!(small.len(), 1024);
2281 assert!(
2282 small.capacity() >= large_capacity,
2283 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
2284 );
2285}
2286
2287#[test]
2288fn driver_best_to_fastest_releases_oversized_hc_tables() {
2289 let mut driver = MatchGeneratorDriver::new(32, 2);
2290
2291 driver.reset(CompressionLevel::Best);
2293 assert_eq!(driver.window_size(), (1u64 << 24));
2294
2295 let mut space = driver.get_next_space();
2297 space[..12].copy_from_slice(b"abcabcabcabc");
2298 space.truncate(12);
2299 driver.commit_space(space);
2300 driver.skip_matching();
2301
2302 driver.reset(CompressionLevel::Fastest);
2304 assert_eq!(driver.window_size(), (1u64 << 17));
2305
2306 let hc = driver.hc_match_generator.as_ref().unwrap();
2308 assert!(
2309 hc.hash_table.is_empty(),
2310 "HC hash_table should be released after switching away from Best"
2311 );
2312 assert!(
2313 hc.chain_table.is_empty(),
2314 "HC chain_table should be released after switching away from Best"
2315 );
2316}
2317
2318#[test]
2319fn driver_better_to_best_resizes_hc_tables() {
2320 let mut driver = MatchGeneratorDriver::new(32, 2);
2321
2322 driver.reset(CompressionLevel::Better);
2324 assert_eq!(driver.window_size(), (1u64 << 23));
2325
2326 let mut space = driver.get_next_space();
2327 space[..12].copy_from_slice(b"abcabcabcabc");
2328 space.truncate(12);
2329 driver.commit_space(space);
2330 driver.skip_matching();
2331
2332 let hc = driver.hc_match_generator.as_ref().unwrap();
2333 let better_hash_len = hc.hash_table.len();
2334 let better_chain_len = hc.chain_table.len();
2335
2336 driver.reset(CompressionLevel::Best);
2338 assert_eq!(driver.window_size(), (1u64 << 24));
2339
2340 let mut space = driver.get_next_space();
2342 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2343 space.truncate(12);
2344 driver.commit_space(space);
2345 driver.skip_matching();
2346
2347 let hc = driver.hc_match_generator.as_ref().unwrap();
2348 assert!(
2349 hc.hash_table.len() > better_hash_len,
2350 "Best hash_table ({}) should be larger than Better ({})",
2351 hc.hash_table.len(),
2352 better_hash_len
2353 );
2354 assert!(
2355 hc.chain_table.len() > better_chain_len,
2356 "Best chain_table ({}) should be larger than Better ({})",
2357 hc.chain_table.len(),
2358 better_chain_len
2359 );
2360}
2361
2362#[test]
2363fn prime_with_dictionary_preserves_history_for_first_full_block() {
2364 let mut driver = MatchGeneratorDriver::new(8, 1);
2365 driver.reset(CompressionLevel::Fastest);
2366
2367 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2368
2369 let mut space = driver.get_next_space();
2370 space.clear();
2371 space.extend_from_slice(b"abcdefgh");
2372 driver.commit_space(space);
2373
2374 let mut saw_match = false;
2375 driver.start_matching(|seq| {
2376 if let Sequence::Triple {
2377 literals,
2378 offset,
2379 match_len,
2380 } = seq
2381 && literals.is_empty()
2382 && offset == 8
2383 && match_len >= MIN_MATCH_LEN
2384 {
2385 saw_match = true;
2386 }
2387 });
2388
2389 assert!(
2390 saw_match,
2391 "first full block should still match dictionary-primed history"
2392 );
2393}
2394
2395#[test]
2396fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
2397 let mut driver = MatchGeneratorDriver::new(8, 1);
2398 driver.reset(CompressionLevel::Fastest);
2399
2400 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2401
2402 let mut space = driver.get_next_space();
2403 space.clear();
2404 space.extend_from_slice(b"abcdefgh");
2405 driver.commit_space(space);
2406
2407 let mut saw_match = false;
2408 driver.start_matching(|seq| {
2409 if let Sequence::Triple {
2410 literals,
2411 offset,
2412 match_len,
2413 } = seq
2414 && literals.is_empty()
2415 && offset == 24
2416 && match_len >= MIN_MATCH_LEN
2417 {
2418 saw_match = true;
2419 }
2420 });
2421
2422 assert!(
2423 saw_match,
2424 "dictionary bytes should remain addressable until frame output exceeds the live window"
2425 );
2426}
2427
2428#[test]
2429fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
2430 let mut driver = MatchGeneratorDriver::new(8, 1);
2431 driver.reset(CompressionLevel::Fastest);
2432
2433 driver.prime_with_dictionary(&[], [11, 7, 3]);
2434
2435 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
2436}
2437
2438#[test]
2439fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
2440 let mut driver = MatchGeneratorDriver::new(8, 1);
2441 driver.reset(CompressionLevel::Default);
2442
2443 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2444
2445 let mut space = driver.get_next_space();
2446 space.clear();
2447 space.extend_from_slice(b"abcdefgh");
2448 driver.commit_space(space);
2449
2450 let mut saw_match = false;
2451 driver.start_matching(|seq| {
2452 if let Sequence::Triple {
2453 literals,
2454 offset,
2455 match_len,
2456 } = seq
2457 && literals.is_empty()
2458 && offset == 8
2459 && match_len >= DFAST_MIN_MATCH_LEN
2460 {
2461 saw_match = true;
2462 }
2463 });
2464
2465 assert!(
2466 saw_match,
2467 "dfast backend should match dictionary-primed history in first full block"
2468 );
2469}
2470
2471#[test]
2472fn prime_with_dictionary_does_not_inflate_reported_window_size() {
2473 let mut driver = MatchGeneratorDriver::new(8, 1);
2474 driver.reset(CompressionLevel::Fastest);
2475
2476 let before = driver.window_size();
2477 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2478 let after = driver.window_size();
2479
2480 assert_eq!(
2481 after, before,
2482 "dictionary retention budget must not change reported frame window size"
2483 );
2484}
2485
2486#[test]
2487fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
2488 let mut driver = MatchGeneratorDriver::new(8, 2);
2489 driver.reset(CompressionLevel::Fastest);
2490
2491 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2494
2495 assert!(
2496 driver
2497 .match_generator
2498 .window
2499 .iter()
2500 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
2501 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
2502 );
2503}
2504
2505#[test]
2506fn prime_with_dictionary_counts_only_committed_tail_budget() {
2507 let mut driver = MatchGeneratorDriver::new(8, 1);
2508 driver.reset(CompressionLevel::Fastest);
2509
2510 let before = driver.match_generator.max_window_size;
2511 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2513
2514 assert_eq!(
2515 driver.match_generator.max_window_size,
2516 before + 8,
2517 "retention budget must account only for dictionary bytes actually committed to history"
2518 );
2519}
2520
2521#[test]
2522fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
2523 let mut driver = MatchGeneratorDriver::new(8, 1);
2524 driver.reset(CompressionLevel::Default);
2525
2526 let before = driver.dfast_matcher().max_window_size;
2527 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
2530
2531 assert_eq!(
2532 driver.dfast_matcher().max_window_size,
2533 before + 12,
2534 "dfast retention budget should include 4-byte dictionary tails"
2535 );
2536}
2537
2538#[test]
2539fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
2540 let mut driver = MatchGeneratorDriver::new(8, 1);
2541 driver.reset(CompressionLevel::Fastest);
2542 driver.match_generator.max_window_size = 8;
2545 driver.reported_window_size = 8;
2546
2547 let base_window = driver.match_generator.max_window_size;
2548 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2549 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
2550
2551 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2552 let mut space = driver.get_next_space();
2553 space.clear();
2554 space.extend_from_slice(block);
2555 driver.commit_space(space);
2556 driver.skip_matching();
2557 }
2558
2559 assert_eq!(
2560 driver.dictionary_retained_budget, 0,
2561 "dictionary budget should be fully retired once primed dict slices are evicted"
2562 );
2563 assert_eq!(
2564 driver.match_generator.max_window_size, base_window,
2565 "retired dictionary budget must not remain reusable for live history"
2566 );
2567}
2568
2569#[test]
2570fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
2571 let mut driver = MatchGeneratorDriver::new(8, 1);
2572 driver.reset(CompressionLevel::Default);
2573 driver.dfast_matcher_mut().max_window_size = 8;
2576 driver.reported_window_size = 8;
2577
2578 let base_window = driver.dfast_matcher().max_window_size;
2579 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2580 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
2581
2582 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2583 let mut space = driver.get_next_space();
2584 space.clear();
2585 space.extend_from_slice(block);
2586 driver.commit_space(space);
2587 driver.skip_matching();
2588 }
2589
2590 assert_eq!(
2591 driver.dictionary_retained_budget, 0,
2592 "dictionary budget should be fully retired once primed dict slices are evicted"
2593 );
2594 assert_eq!(
2595 driver.dfast_matcher().max_window_size,
2596 base_window,
2597 "retired dictionary budget must not remain reusable for live history"
2598 );
2599}
2600
2601#[test]
2602fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
2603 let mut driver = MatchGeneratorDriver::new(8, 1);
2604 driver.reset(CompressionLevel::Better);
2605
2606 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2607
2608 let mut space = driver.get_next_space();
2609 space.clear();
2610 space.extend_from_slice(b"abcdefgh");
2613 driver.commit_space(space);
2614
2615 let mut saw_match = false;
2616 driver.start_matching(|seq| {
2617 if let Sequence::Triple {
2618 literals,
2619 offset,
2620 match_len,
2621 } = seq
2622 && literals.is_empty()
2623 && offset == 8
2624 && match_len >= HC_MIN_MATCH_LEN
2625 {
2626 saw_match = true;
2627 }
2628 });
2629
2630 assert!(
2631 saw_match,
2632 "hash-chain backend should match dictionary-primed history in first full block"
2633 );
2634}
2635
2636#[test]
2637fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
2638 let mut driver = MatchGeneratorDriver::new(8, 1);
2639 driver.reset(CompressionLevel::Better);
2640 driver.hc_matcher_mut().max_window_size = 8;
2642 driver.reported_window_size = 8;
2643
2644 let base_window = driver.hc_matcher().max_window_size;
2645 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2646 assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
2647
2648 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2649 let mut space = driver.get_next_space();
2650 space.clear();
2651 space.extend_from_slice(block);
2652 driver.commit_space(space);
2653 driver.skip_matching();
2654 }
2655
2656 assert_eq!(
2657 driver.dictionary_retained_budget, 0,
2658 "dictionary budget should be fully retired once primed dict slices are evicted"
2659 );
2660 assert_eq!(
2661 driver.hc_matcher().max_window_size,
2662 base_window,
2663 "retired dictionary budget must not remain reusable for live history"
2664 );
2665}
2666
2667#[test]
2668fn suffix_store_with_single_slot_does_not_panic_on_keying() {
2669 let mut suffixes = SuffixStore::with_capacity(1);
2670 suffixes.insert(b"abcde", 0);
2671 assert!(suffixes.contains_key(b"abcde"));
2672 assert_eq!(suffixes.get(b"abcde"), Some(0));
2673}
2674
2675#[test]
2676fn fastest_reset_uses_interleaved_hash_fill_step() {
2677 let mut driver = MatchGeneratorDriver::new(32, 2);
2678
2679 driver.reset(CompressionLevel::Uncompressed);
2680 assert_eq!(driver.match_generator.hash_fill_step, 1);
2681
2682 driver.reset(CompressionLevel::Fastest);
2683 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
2684
2685 driver.reset(CompressionLevel::Better);
2688 assert_eq!(driver.active_backend, MatcherBackend::HashChain);
2689 assert_eq!(driver.window_size(), (1u64 << 23));
2690 assert_eq!(driver.hc_matcher().lazy_depth, 2);
2691}
2692
2693#[test]
2694fn simple_matcher_updates_offset_history_after_emitting_match() {
2695 let mut matcher = MatchGenerator::new(64);
2696 matcher.add_data(
2697 b"abcdeabcdeabcde".to_vec(),
2698 SuffixStore::with_capacity(64),
2699 |_, _| {},
2700 );
2701
2702 assert!(matcher.next_sequence(|seq| {
2703 assert_eq!(
2704 seq,
2705 Sequence::Triple {
2706 literals: b"abcde",
2707 offset: 5,
2708 match_len: 10,
2709 }
2710 );
2711 }));
2712 assert_eq!(matcher.offset_hist, [5, 1, 4]);
2713}
2714
2715#[test]
2716fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
2717 let mut matcher = MatchGenerator::new(64);
2718 matcher.add_data(
2719 b"abcdefghijabcdefghij".to_vec(),
2720 SuffixStore::with_capacity(64),
2721 |_, _| {},
2722 );
2723
2724 matcher.suffix_idx = 10;
2725 matcher.last_idx_in_sequence = 10;
2726 matcher.offset_hist = [99, 10, 4];
2727
2728 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2729 assert_eq!(candidate, Some((10, 10)));
2730}
2731
2732#[test]
2733fn simple_matcher_repcode_can_target_previous_window_entry() {
2734 let mut matcher = MatchGenerator::new(64);
2735 matcher.add_data(
2736 b"abcdefghij".to_vec(),
2737 SuffixStore::with_capacity(64),
2738 |_, _| {},
2739 );
2740 matcher.skip_matching();
2741 matcher.add_data(
2742 b"abcdefghij".to_vec(),
2743 SuffixStore::with_capacity(64),
2744 |_, _| {},
2745 );
2746
2747 matcher.offset_hist = [99, 10, 4];
2748
2749 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
2750 assert_eq!(candidate, Some((10, 10)));
2751}
2752
2753#[test]
2754fn simple_matcher_zero_literal_repcode_checks_rep2() {
2755 let mut matcher = MatchGenerator::new(64);
2756 matcher.add_data(
2757 b"abcdefghijabcdefghij".to_vec(),
2758 SuffixStore::with_capacity(64),
2759 |_, _| {},
2760 );
2761 matcher.suffix_idx = 10;
2762 matcher.last_idx_in_sequence = 10;
2763 matcher.offset_hist = [99, 4, 10];
2765
2766 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2767 assert_eq!(candidate, Some((10, 10)));
2768}
2769
2770#[test]
2771fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
2772 let mut matcher = MatchGenerator::new(64);
2773 matcher.add_data(
2774 b"abcdefghijabcdefghij".to_vec(),
2775 SuffixStore::with_capacity(64),
2776 |_, _| {},
2777 );
2778 matcher.suffix_idx = 10;
2779 matcher.last_idx_in_sequence = 10;
2780 matcher.offset_hist = [11, 4, 99];
2782
2783 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2784 assert_eq!(candidate, Some((10, 10)));
2785}
2786
2787#[test]
2788fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
2789 let mut matcher = MatchGenerator::new(64);
2790 matcher.add_data(
2791 b"abcdefghij".to_vec(),
2792 SuffixStore::with_capacity(64),
2793 |_, _| {},
2794 );
2795 matcher.skip_matching();
2796 matcher.add_data(
2797 b"klmnopqrst".to_vec(),
2798 SuffixStore::with_capacity(64),
2799 |_, _| {},
2800 );
2801 matcher.suffix_idx = 3;
2802
2803 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
2804 assert_eq!(candidate, None);
2805}
2806
2807#[test]
2808fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
2809 let mut matcher = MatchGenerator::new(64);
2810 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2811 matcher.add_data(
2812 b"abcdefghijklmnop".to_vec(),
2813 SuffixStore::with_capacity(64),
2814 |_, _| {},
2815 );
2816 matcher.skip_matching();
2817 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
2818
2819 assert!(matcher.next_sequence(|seq| {
2820 assert_eq!(
2821 seq,
2822 Sequence::Triple {
2823 literals: b"",
2824 offset: 15,
2825 match_len: 5,
2826 }
2827 );
2828 }));
2829 assert!(!matcher.next_sequence(|_| {}));
2830}
2831
2832#[test]
2833fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
2834 let mut matcher = MatchGenerator::new(64);
2835 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2836 matcher.add_data(
2837 b"01234abcde".to_vec(),
2838 SuffixStore::with_capacity(64),
2839 |_, _| {},
2840 );
2841 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
2842
2843 let last = matcher.window.last().unwrap();
2844 let tail = &last.data[5..10];
2845 assert_eq!(last.suffixes.get(tail), Some(5));
2846}
2847
2848#[test]
2849fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
2850 let mut matcher = MatchGenerator::new(128);
2851 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2852 matcher.add_data(
2853 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2854 SuffixStore::with_capacity(1 << 16),
2855 |_, _| {},
2856 );
2857
2858 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
2859
2860 let last = matcher.window.last().unwrap();
2861 let first_key = &last.data[..MIN_MATCH_LEN];
2862 assert_eq!(last.suffixes.get(first_key), None);
2863}
2864
2865#[test]
2866fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
2867 let mut matcher = MatchGenerator::new(128);
2868 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2869 matcher.add_data(
2870 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2871 SuffixStore::with_capacity(1 << 16),
2872 |_, _| {},
2873 );
2874
2875 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
2876
2877 let last = matcher.window.last().unwrap();
2878 for pos in [0usize, 3, 6, 9, 12] {
2879 let key = &last.data[pos..pos + MIN_MATCH_LEN];
2880 assert_eq!(
2881 last.suffixes.get(key),
2882 Some(pos),
2883 "expected interleaved suffix registration at pos {pos}"
2884 );
2885 }
2886}
2887
2888#[test]
2889fn dfast_skip_matching_handles_window_eviction() {
2890 let mut matcher = DfastMatchGenerator::new(16);
2891
2892 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
2893 matcher.skip_matching();
2894 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2895 matcher.skip_matching();
2896 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2897
2898 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
2899 matcher.start_matching(|seq| match seq {
2900 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2901 Sequence::Triple {
2902 literals,
2903 offset,
2904 match_len,
2905 } => {
2906 reconstructed.extend_from_slice(literals);
2907 let start = reconstructed.len() - offset;
2908 for i in 0..match_len {
2909 let byte = reconstructed[start + i];
2910 reconstructed.push(byte);
2911 }
2912 }
2913 });
2914
2915 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
2916}
2917
2918#[test]
2919fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
2920 let mut matcher = DfastMatchGenerator::new(8);
2921
2922 let mut first = Vec::with_capacity(64);
2923 first.extend_from_slice(b"abcdefgh");
2924 matcher.add_data(first, |_| {});
2925
2926 let mut second = Vec::with_capacity(64);
2927 second.extend_from_slice(b"ijklmnop");
2928
2929 let mut observed_evicted_len = None;
2930 matcher.add_data(second, |data| {
2931 observed_evicted_len = Some(data.len());
2932 });
2933
2934 assert_eq!(
2935 observed_evicted_len,
2936 Some(8),
2937 "eviction callback must report evicted byte length, not backing capacity"
2938 );
2939}
2940
2941#[test]
2942fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
2943 let mut matcher = DfastMatchGenerator::new(16);
2944
2945 let mut first = Vec::with_capacity(64);
2946 first.extend_from_slice(b"abcdefgh");
2947 matcher.add_data(first, |_| {});
2948
2949 let mut second = Vec::with_capacity(64);
2950 second.extend_from_slice(b"ijklmnop");
2951 matcher.add_data(second, |_| {});
2952
2953 matcher.max_window_size = 8;
2954
2955 let mut observed_evicted_len = None;
2956 matcher.trim_to_window(|data| {
2957 observed_evicted_len = Some(data.len());
2958 });
2959
2960 assert_eq!(
2961 observed_evicted_len,
2962 Some(8),
2963 "trim callback must report evicted byte length, not backing capacity"
2964 );
2965}
2966
2967#[test]
2968fn dfast_inserts_tail_positions_for_next_block_matching() {
2969 let mut matcher = DfastMatchGenerator::new(1 << 22);
2970
2971 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
2972 let mut history = Vec::new();
2973 matcher.start_matching(|seq| match seq {
2974 Sequence::Literals { literals } => history.extend_from_slice(literals),
2975 Sequence::Triple { .. } => unreachable!("first block should not match history"),
2976 });
2977 assert_eq!(history, b"012345bcdea");
2978
2979 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
2980 let mut saw_first_sequence = false;
2981 matcher.start_matching(|seq| {
2982 assert!(!saw_first_sequence, "expected a single cross-block match");
2983 saw_first_sequence = true;
2984 match seq {
2985 Sequence::Literals { .. } => {
2986 panic!("expected tail-anchored cross-block match before any literals")
2987 }
2988 Sequence::Triple {
2989 literals,
2990 offset,
2991 match_len,
2992 } => {
2993 assert_eq!(literals, b"");
2994 assert_eq!(offset, 5);
2995 assert_eq!(match_len, 11);
2996 let start = history.len() - offset;
2997 for i in 0..match_len {
2998 let byte = history[start + i];
2999 history.push(byte);
3000 }
3001 }
3002 }
3003 });
3004
3005 assert!(
3006 saw_first_sequence,
3007 "expected tail-anchored cross-block match"
3008 );
3009 assert_eq!(history, b"012345bcdeabcdeabcdeab");
3010}