1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const ROW_MIN_MATCH_LEN: usize = 6;
22const DFAST_TARGET_LEN: usize = 48;
23const DFAST_HASH_BITS: usize = 20;
26const DFAST_SEARCH_DEPTH: usize = 4;
27const DFAST_EMPTY_SLOT: usize = usize::MAX;
28const ROW_HASH_BITS: usize = 20;
29const ROW_LOG: usize = 5;
30const ROW_SEARCH_DEPTH: usize = 16;
31const ROW_TARGET_LEN: usize = 48;
32const ROW_TAG_BITS: usize = 8;
33const ROW_EMPTY_SLOT: usize = usize::MAX;
34const ROW_HASH_KEY_LEN: usize = 4;
35
36const HC_HASH_LOG: usize = 20;
37const HC_CHAIN_LOG: usize = 19;
38const HC_SEARCH_DEPTH: usize = 16;
39const HC_MIN_MATCH_LEN: usize = 5;
40const HC_TARGET_LEN: usize = 48;
41const HC_EMPTY: u32 = 0;
44
45const MAX_HC_SEARCH_DEPTH: usize = 32;
48
49#[derive(Copy, Clone)]
52struct HcConfig {
53 hash_log: usize,
54 chain_log: usize,
55 search_depth: usize,
56 target_len: usize,
57}
58
59#[derive(Copy, Clone)]
60struct RowConfig {
61 hash_bits: usize,
62 row_log: usize,
63 search_depth: usize,
64 target_len: usize,
65}
66
67const HC_CONFIG: HcConfig = HcConfig {
68 hash_log: HC_HASH_LOG,
69 chain_log: HC_CHAIN_LOG,
70 search_depth: HC_SEARCH_DEPTH,
71 target_len: HC_TARGET_LEN,
72};
73
74const BEST_HC_CONFIG: HcConfig = HcConfig {
76 hash_log: 21,
77 chain_log: 20,
78 search_depth: 32,
79 target_len: 128,
80};
81
82const ROW_CONFIG: RowConfig = RowConfig {
83 hash_bits: ROW_HASH_BITS,
84 row_log: ROW_LOG,
85 search_depth: ROW_SEARCH_DEPTH,
86 target_len: ROW_TARGET_LEN,
87};
88
89#[derive(Copy, Clone)]
91struct LevelParams {
92 backend: MatcherBackend,
93 window_log: u8,
94 hash_fill_step: usize,
95 lazy_depth: u8,
96 hc: HcConfig,
97 row: RowConfig,
98}
99
100fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
101 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
102 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
103}
104
105fn row_hash_bits_for_window(max_window_size: usize) -> usize {
106 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
107 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
108}
109
110#[rustfmt::skip]
119const LEVEL_TABLE: [LevelParams; 22] = [
120 LevelParams { backend: MatcherBackend::Simple, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
123 LevelParams { backend: MatcherBackend::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
124 LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
125 LevelParams { backend: MatcherBackend::Row, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
126 LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
127 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
128 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
129 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
130 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
131 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
132 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
133 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
134 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
135 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
136 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
137 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
138 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
139 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
140 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
141 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
142 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
143 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
144];
145
146const MIN_WINDOW_LOG: u8 = 10;
148const MIN_HINTED_WINDOW_LOG: u8 = 14;
154
155fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
165 let src_log = if src_size == 0 {
170 MIN_WINDOW_LOG
171 } else {
172 (64 - (src_size - 1).leading_zeros()) as u8 };
174 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
175 if src_log < params.window_log {
176 params.window_log = src_log;
177 }
178 if params.backend == MatcherBackend::HashChain {
182 if (src_log + 2) < params.hc.hash_log as u8 {
183 params.hc.hash_log = (src_log + 2) as usize;
184 }
185 if (src_log + 1) < params.hc.chain_log as u8 {
186 params.hc.chain_log = (src_log + 1) as usize;
187 }
188 } else if params.backend == MatcherBackend::Row {
189 let max_window_size = 1usize << params.window_log;
190 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
191 }
192 params
193}
194
195fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
198 let params = match level {
199 CompressionLevel::Uncompressed => LevelParams {
200 backend: MatcherBackend::Simple,
201 window_log: 17,
202 hash_fill_step: 1,
203 lazy_depth: 0,
204 hc: HC_CONFIG,
205 row: ROW_CONFIG,
206 },
207 CompressionLevel::Fastest => LEVEL_TABLE[0],
208 CompressionLevel::Default => LEVEL_TABLE[2],
209 CompressionLevel::Better => LEVEL_TABLE[6],
210 CompressionLevel::Best => LEVEL_TABLE[10],
211 CompressionLevel::Level(n) => {
212 if n > 0 {
213 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
214 LEVEL_TABLE[idx]
215 } else if n == 0 {
216 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
218 } else {
219 let acceleration =
223 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
224 let step = (acceleration + 3).min(128);
225 LevelParams {
226 backend: MatcherBackend::Simple,
227 window_log: 17,
228 hash_fill_step: step,
229 lazy_depth: 0,
230 hc: HC_CONFIG,
231 row: ROW_CONFIG,
232 }
233 }
234 }
235 };
236 if let Some(size) = source_size {
237 adjust_params_for_source_size(params, size)
238 } else {
239 params
240 }
241}
242
243#[derive(Copy, Clone, Debug, PartialEq, Eq)]
244enum MatcherBackend {
245 Simple,
246 Dfast,
247 Row,
248 HashChain,
249}
250
251pub struct MatchGeneratorDriver {
253 vec_pool: Vec<Vec<u8>>,
254 suffix_pool: Vec<SuffixStore>,
255 match_generator: MatchGenerator,
256 dfast_match_generator: Option<DfastMatchGenerator>,
257 row_match_generator: Option<RowMatchGenerator>,
258 hc_match_generator: Option<HcMatchGenerator>,
259 active_backend: MatcherBackend,
260 slice_size: usize,
261 base_slice_size: usize,
262 reported_window_size: usize,
265 dictionary_retained_budget: usize,
268 source_size_hint: Option<u64>,
270}
271
272impl MatchGeneratorDriver {
273 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
278 let max_window_size = max_slices_in_window * slice_size;
279 Self {
280 vec_pool: Vec::new(),
281 suffix_pool: Vec::new(),
282 match_generator: MatchGenerator::new(max_window_size),
283 dfast_match_generator: None,
284 row_match_generator: None,
285 hc_match_generator: None,
286 active_backend: MatcherBackend::Simple,
287 slice_size,
288 base_slice_size: slice_size,
289 reported_window_size: max_window_size,
290 dictionary_retained_budget: 0,
291 source_size_hint: None,
292 }
293 }
294
295 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
296 resolve_level_params(level, source_size)
297 }
298
299 fn dfast_matcher(&self) -> &DfastMatchGenerator {
300 self.dfast_match_generator
301 .as_ref()
302 .expect("dfast backend must be initialized by reset() before use")
303 }
304
305 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
306 self.dfast_match_generator
307 .as_mut()
308 .expect("dfast backend must be initialized by reset() before use")
309 }
310
311 fn row_matcher(&self) -> &RowMatchGenerator {
312 self.row_match_generator
313 .as_ref()
314 .expect("row backend must be initialized by reset() before use")
315 }
316
317 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
318 self.row_match_generator
319 .as_mut()
320 .expect("row backend must be initialized by reset() before use")
321 }
322
323 fn hc_matcher(&self) -> &HcMatchGenerator {
324 self.hc_match_generator
325 .as_ref()
326 .expect("hash chain backend must be initialized by reset() before use")
327 }
328
329 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
330 self.hc_match_generator
331 .as_mut()
332 .expect("hash chain backend must be initialized by reset() before use")
333 }
334
335 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
336 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
337 if reclaimed == 0 {
338 return;
339 }
340 self.dictionary_retained_budget -= reclaimed;
341 match self.active_backend {
342 MatcherBackend::Simple => {
343 self.match_generator.max_window_size = self
344 .match_generator
345 .max_window_size
346 .saturating_sub(reclaimed);
347 }
348 MatcherBackend::Dfast => {
349 let matcher = self.dfast_matcher_mut();
350 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
351 }
352 MatcherBackend::Row => {
353 let matcher = self.row_matcher_mut();
354 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
355 }
356 MatcherBackend::HashChain => {
357 let matcher = self.hc_matcher_mut();
358 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
359 }
360 }
361 }
362
363 fn trim_after_budget_retire(&mut self) {
364 loop {
365 let mut evicted_bytes = 0usize;
366 match self.active_backend {
367 MatcherBackend::Simple => {
368 let vec_pool = &mut self.vec_pool;
369 let suffix_pool = &mut self.suffix_pool;
370 self.match_generator.reserve(0, |mut data, mut suffixes| {
371 evicted_bytes += data.len();
372 data.resize(data.capacity(), 0);
373 vec_pool.push(data);
374 suffixes.slots.clear();
375 suffixes.slots.resize(suffixes.slots.capacity(), None);
376 suffix_pool.push(suffixes);
377 });
378 }
379 MatcherBackend::Dfast => {
380 let mut retired = Vec::new();
381 self.dfast_matcher_mut().trim_to_window(|data| {
382 evicted_bytes += data.len();
383 retired.push(data);
384 });
385 for mut data in retired {
386 data.resize(data.capacity(), 0);
387 self.vec_pool.push(data);
388 }
389 }
390 MatcherBackend::Row => {
391 let mut retired = Vec::new();
392 self.row_matcher_mut().trim_to_window(|data| {
393 evicted_bytes += data.len();
394 retired.push(data);
395 });
396 for mut data in retired {
397 data.resize(data.capacity(), 0);
398 self.vec_pool.push(data);
399 }
400 }
401 MatcherBackend::HashChain => {
402 let mut retired = Vec::new();
403 self.hc_matcher_mut().trim_to_window(|data| {
404 evicted_bytes += data.len();
405 retired.push(data);
406 });
407 for mut data in retired {
408 data.resize(data.capacity(), 0);
409 self.vec_pool.push(data);
410 }
411 }
412 }
413 if evicted_bytes == 0 {
414 break;
415 }
416 self.retire_dictionary_budget(evicted_bytes);
417 }
418 }
419}
420
421impl Matcher for MatchGeneratorDriver {
422 fn supports_dictionary_priming(&self) -> bool {
423 true
424 }
425
426 fn set_source_size_hint(&mut self, size: u64) {
427 self.source_size_hint = Some(size);
428 }
429
430 fn reset(&mut self, level: CompressionLevel) {
431 let hint = self.source_size_hint.take();
432 let hinted = hint.is_some();
433 let params = Self::level_params(level, hint);
434 let max_window_size = 1usize << params.window_log;
435 self.dictionary_retained_budget = 0;
436 if self.active_backend != params.backend {
437 match self.active_backend {
438 MatcherBackend::Simple => {
439 let vec_pool = &mut self.vec_pool;
440 let suffix_pool = &mut self.suffix_pool;
441 self.match_generator.reset(|mut data, mut suffixes| {
442 data.resize(data.capacity(), 0);
443 vec_pool.push(data);
444 suffixes.slots.clear();
445 suffixes.slots.resize(suffixes.slots.capacity(), None);
446 suffix_pool.push(suffixes);
447 });
448 }
449 MatcherBackend::Dfast => {
450 if let Some(dfast) = self.dfast_match_generator.as_mut() {
451 let vec_pool = &mut self.vec_pool;
452 dfast.reset(|mut data| {
453 data.resize(data.capacity(), 0);
454 vec_pool.push(data);
455 });
456 }
457 }
458 MatcherBackend::Row => {
459 if let Some(row) = self.row_match_generator.as_mut() {
460 row.row_heads = Vec::new();
461 row.row_positions = Vec::new();
462 row.row_tags = Vec::new();
463 let vec_pool = &mut self.vec_pool;
464 row.reset(|mut data| {
465 data.resize(data.capacity(), 0);
466 vec_pool.push(data);
467 });
468 }
469 }
470 MatcherBackend::HashChain => {
471 if let Some(hc) = self.hc_match_generator.as_mut() {
472 hc.hash_table = Vec::new();
475 hc.chain_table = Vec::new();
476 let vec_pool = &mut self.vec_pool;
477 hc.reset(|mut data| {
478 data.resize(data.capacity(), 0);
479 vec_pool.push(data);
480 });
481 }
482 }
483 }
484 }
485
486 self.active_backend = params.backend;
487 self.slice_size = self.base_slice_size.min(max_window_size);
488 self.reported_window_size = max_window_size;
489 match self.active_backend {
490 MatcherBackend::Simple => {
491 let vec_pool = &mut self.vec_pool;
492 let suffix_pool = &mut self.suffix_pool;
493 self.match_generator.max_window_size = max_window_size;
494 self.match_generator.hash_fill_step = params.hash_fill_step;
495 self.match_generator.reset(|mut data, mut suffixes| {
496 data.resize(data.capacity(), 0);
497 vec_pool.push(data);
498 suffixes.slots.clear();
499 suffixes.slots.resize(suffixes.slots.capacity(), None);
500 suffix_pool.push(suffixes);
501 });
502 }
503 MatcherBackend::Dfast => {
504 let dfast = self
505 .dfast_match_generator
506 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
507 dfast.max_window_size = max_window_size;
508 dfast.lazy_depth = params.lazy_depth;
509 dfast.set_hash_bits(if hinted {
510 dfast_hash_bits_for_window(max_window_size)
511 } else {
512 DFAST_HASH_BITS
513 });
514 let vec_pool = &mut self.vec_pool;
515 dfast.reset(|mut data| {
516 data.resize(data.capacity(), 0);
517 vec_pool.push(data);
518 });
519 }
520 MatcherBackend::Row => {
521 let row = self
522 .row_match_generator
523 .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
524 row.max_window_size = max_window_size;
525 row.lazy_depth = params.lazy_depth;
526 row.configure(params.row);
527 if hinted {
528 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
529 }
530 let vec_pool = &mut self.vec_pool;
531 row.reset(|mut data| {
532 data.resize(data.capacity(), 0);
533 vec_pool.push(data);
534 });
535 }
536 MatcherBackend::HashChain => {
537 let hc = self
538 .hc_match_generator
539 .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
540 hc.max_window_size = max_window_size;
541 hc.lazy_depth = params.lazy_depth;
542 hc.configure(params.hc);
543 let vec_pool = &mut self.vec_pool;
544 hc.reset(|mut data| {
545 data.resize(data.capacity(), 0);
546 vec_pool.push(data);
547 });
548 }
549 }
550 }
551
552 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
553 match self.active_backend {
554 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
555 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
556 MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
557 MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
558 }
559
560 if dict_content.is_empty() {
561 return;
562 }
563
564 let retained_dict_budget = dict_content.len();
567 match self.active_backend {
568 MatcherBackend::Simple => {
569 self.match_generator.max_window_size = self
570 .match_generator
571 .max_window_size
572 .saturating_add(retained_dict_budget);
573 }
574 MatcherBackend::Dfast => {
575 let matcher = self.dfast_matcher_mut();
576 matcher.max_window_size =
577 matcher.max_window_size.saturating_add(retained_dict_budget);
578 }
579 MatcherBackend::Row => {
580 let matcher = self.row_matcher_mut();
581 matcher.max_window_size =
582 matcher.max_window_size.saturating_add(retained_dict_budget);
583 }
584 MatcherBackend::HashChain => {
585 let matcher = self.hc_matcher_mut();
586 matcher.max_window_size =
587 matcher.max_window_size.saturating_add(retained_dict_budget);
588 }
589 }
590
591 let mut start = 0usize;
592 let mut committed_dict_budget = 0usize;
593 let min_primed_tail = match self.active_backend {
597 MatcherBackend::Simple => MIN_MATCH_LEN,
598 MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
599 };
600 while start < dict_content.len() {
601 let end = (start + self.slice_size).min(dict_content.len());
602 if end - start < min_primed_tail {
603 break;
604 }
605 let mut space = self.get_next_space();
606 space.clear();
607 space.extend_from_slice(&dict_content[start..end]);
608 self.commit_space(space);
609 self.skip_matching();
610 committed_dict_budget += end - start;
611 start = end;
612 }
613
614 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
615 if uncommitted_tail_budget > 0 {
616 match self.active_backend {
617 MatcherBackend::Simple => {
618 self.match_generator.max_window_size = self
619 .match_generator
620 .max_window_size
621 .saturating_sub(uncommitted_tail_budget);
622 }
623 MatcherBackend::Dfast => {
624 let matcher = self.dfast_matcher_mut();
625 matcher.max_window_size = matcher
626 .max_window_size
627 .saturating_sub(uncommitted_tail_budget);
628 }
629 MatcherBackend::Row => {
630 let matcher = self.row_matcher_mut();
631 matcher.max_window_size = matcher
632 .max_window_size
633 .saturating_sub(uncommitted_tail_budget);
634 }
635 MatcherBackend::HashChain => {
636 let matcher = self.hc_matcher_mut();
637 matcher.max_window_size = matcher
638 .max_window_size
639 .saturating_sub(uncommitted_tail_budget);
640 }
641 }
642 }
643 if committed_dict_budget > 0 {
644 self.dictionary_retained_budget = self
645 .dictionary_retained_budget
646 .saturating_add(committed_dict_budget);
647 }
648 }
649
650 fn window_size(&self) -> u64 {
651 self.reported_window_size as u64
652 }
653
654 fn get_next_space(&mut self) -> Vec<u8> {
655 if let Some(mut space) = self.vec_pool.pop() {
656 if space.len() > self.slice_size {
657 space.truncate(self.slice_size);
658 }
659 if space.len() < self.slice_size {
660 space.resize(self.slice_size, 0);
661 }
662 return space;
663 }
664 alloc::vec![0; self.slice_size]
665 }
666
667 fn get_last_space(&mut self) -> &[u8] {
668 match self.active_backend {
669 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
670 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
671 MatcherBackend::Row => self.row_matcher().get_last_space(),
672 MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
673 }
674 }
675
676 fn commit_space(&mut self, space: Vec<u8>) {
677 match self.active_backend {
678 MatcherBackend::Simple => {
679 let vec_pool = &mut self.vec_pool;
680 let mut evicted_bytes = 0usize;
681 let suffixes = match self.suffix_pool.pop() {
682 Some(store) if store.slots.len() >= space.len() => store,
683 _ => SuffixStore::with_capacity(space.len()),
684 };
685 let suffix_pool = &mut self.suffix_pool;
686 self.match_generator
687 .add_data(space, suffixes, |mut data, mut suffixes| {
688 evicted_bytes += data.len();
689 data.resize(data.capacity(), 0);
690 vec_pool.push(data);
691 suffixes.slots.clear();
692 suffixes.slots.resize(suffixes.slots.capacity(), None);
693 suffix_pool.push(suffixes);
694 });
695 self.retire_dictionary_budget(evicted_bytes);
696 self.trim_after_budget_retire();
697 }
698 MatcherBackend::Dfast => {
699 let vec_pool = &mut self.vec_pool;
700 let mut evicted_bytes = 0usize;
701 self.dfast_match_generator
702 .as_mut()
703 .expect("dfast backend must be initialized by reset() before use")
704 .add_data(space, |mut data| {
705 evicted_bytes += data.len();
706 data.resize(data.capacity(), 0);
707 vec_pool.push(data);
708 });
709 self.retire_dictionary_budget(evicted_bytes);
710 self.trim_after_budget_retire();
711 }
712 MatcherBackend::Row => {
713 let vec_pool = &mut self.vec_pool;
714 let mut evicted_bytes = 0usize;
715 self.row_match_generator
716 .as_mut()
717 .expect("row backend must be initialized by reset() before use")
718 .add_data(space, |mut data| {
719 evicted_bytes += data.len();
720 data.resize(data.capacity(), 0);
721 vec_pool.push(data);
722 });
723 self.retire_dictionary_budget(evicted_bytes);
724 self.trim_after_budget_retire();
725 }
726 MatcherBackend::HashChain => {
727 let vec_pool = &mut self.vec_pool;
728 let mut evicted_bytes = 0usize;
729 self.hc_match_generator
730 .as_mut()
731 .expect("hash chain backend must be initialized by reset() before use")
732 .add_data(space, |mut data| {
733 evicted_bytes += data.len();
734 data.resize(data.capacity(), 0);
735 vec_pool.push(data);
736 });
737 self.retire_dictionary_budget(evicted_bytes);
738 self.trim_after_budget_retire();
739 }
740 }
741 }
742
743 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
744 match self.active_backend {
745 MatcherBackend::Simple => {
746 while self.match_generator.next_sequence(&mut handle_sequence) {}
747 }
748 MatcherBackend::Dfast => self
749 .dfast_matcher_mut()
750 .start_matching(&mut handle_sequence),
751 MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
752 MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
753 }
754 }
755 fn skip_matching(&mut self) {
756 match self.active_backend {
757 MatcherBackend::Simple => self.match_generator.skip_matching(),
758 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
759 MatcherBackend::Row => self.row_matcher_mut().skip_matching(),
760 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
761 }
762 }
763}
764
765struct SuffixStore {
768 slots: Vec<Option<NonZeroUsize>>,
772 len_log: u32,
773}
774
775impl SuffixStore {
776 fn with_capacity(capacity: usize) -> Self {
777 Self {
778 slots: alloc::vec![None; capacity],
779 len_log: capacity.ilog2(),
780 }
781 }
782
783 #[inline(always)]
784 fn insert(&mut self, suffix: &[u8], idx: usize) {
785 let key = self.key(suffix);
786 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
787 }
788
789 #[inline(always)]
790 fn contains_key(&self, suffix: &[u8]) -> bool {
791 let key = self.key(suffix);
792 self.slots[key].is_some()
793 }
794
795 #[inline(always)]
796 fn get(&self, suffix: &[u8]) -> Option<usize> {
797 let key = self.key(suffix);
798 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
799 }
800
801 #[inline(always)]
802 fn key(&self, suffix: &[u8]) -> usize {
803 if self.len_log == 0 {
805 return 0;
806 }
807
808 let s0 = suffix[0] as u64;
809 let s1 = suffix[1] as u64;
810 let s2 = suffix[2] as u64;
811 let s3 = suffix[3] as u64;
812 let s4 = suffix[4] as u64;
813
814 const POLY: u64 = 0xCF3BCCDCABu64;
815
816 let s0 = (s0 << 24).wrapping_mul(POLY);
817 let s1 = (s1 << 32).wrapping_mul(POLY);
818 let s2 = (s2 << 40).wrapping_mul(POLY);
819 let s3 = (s3 << 48).wrapping_mul(POLY);
820 let s4 = (s4 << 56).wrapping_mul(POLY);
821
822 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
823 let index = index >> (64 - self.len_log);
824 index as usize % self.slots.len()
825 }
826}
827
828struct WindowEntry {
831 data: Vec<u8>,
832 suffixes: SuffixStore,
834 base_offset: usize,
836}
837
838pub(crate) struct MatchGenerator {
839 max_window_size: usize,
840 window: Vec<WindowEntry>,
843 window_size: usize,
844 #[cfg(debug_assertions)]
845 concat_window: Vec<u8>,
846 suffix_idx: usize,
848 last_idx_in_sequence: usize,
850 hash_fill_step: usize,
851 offset_hist: [u32; 3],
852}
853
854impl MatchGenerator {
855 #[inline(always)]
856 #[cfg(target_endian = "little")]
857 fn mismatch_byte_index(diff: usize) -> usize {
858 diff.trailing_zeros() as usize / 8
859 }
860
861 #[inline(always)]
862 #[cfg(target_endian = "big")]
863 fn mismatch_byte_index(diff: usize) -> usize {
864 diff.leading_zeros() as usize / 8
865 }
866
867 fn new(max_size: usize) -> Self {
869 Self {
870 max_window_size: max_size,
871 window: Vec::new(),
872 window_size: 0,
873 #[cfg(debug_assertions)]
874 concat_window: Vec::new(),
875 suffix_idx: 0,
876 last_idx_in_sequence: 0,
877 hash_fill_step: 1,
878 offset_hist: [1, 4, 8],
879 }
880 }
881
882 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
883 self.window_size = 0;
884 #[cfg(debug_assertions)]
885 self.concat_window.clear();
886 self.suffix_idx = 0;
887 self.last_idx_in_sequence = 0;
888 self.offset_hist = [1, 4, 8];
889 self.window.drain(..).for_each(|entry| {
890 reuse_space(entry.data, entry.suffixes);
891 });
892 }
893
894 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
899 loop {
900 let last_entry = self.window.last().unwrap();
901 let data_slice = &last_entry.data;
902
903 if self.suffix_idx >= data_slice.len() {
905 if self.last_idx_in_sequence != self.suffix_idx {
906 let literals = &data_slice[self.last_idx_in_sequence..];
907 self.last_idx_in_sequence = self.suffix_idx;
908 handle_sequence(Sequence::Literals { literals });
909 return true;
910 } else {
911 return false;
912 }
913 }
914
915 let data_slice = &data_slice[self.suffix_idx..];
917 if data_slice.len() < MIN_MATCH_LEN {
918 let last_idx_in_sequence = self.last_idx_in_sequence;
919 self.last_idx_in_sequence = last_entry.data.len();
920 self.suffix_idx = last_entry.data.len();
921 handle_sequence(Sequence::Literals {
922 literals: &last_entry.data[last_idx_in_sequence..],
923 });
924 return true;
925 }
926
927 let key = &data_slice[..MIN_MATCH_LEN];
929 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
930
931 let mut candidate = self.repcode_candidate(data_slice, literals_len);
933 for match_entry in self.window.iter() {
934 if let Some(match_index) = match_entry.suffixes.get(key) {
935 let match_slice = &match_entry.data[match_index..];
936
937 let match_len = Self::common_prefix_len(match_slice, data_slice);
939
940 if match_len >= MIN_MATCH_LEN {
942 let offset = match_entry.base_offset + self.suffix_idx - match_index;
943
944 #[cfg(debug_assertions)]
946 {
947 let unprocessed = last_entry.data.len() - self.suffix_idx;
948 let start = self.concat_window.len() - unprocessed - offset;
949 let end = start + match_len;
950 let check_slice = &self.concat_window[start..end];
951 debug_assert_eq!(check_slice, &match_slice[..match_len]);
952 }
953
954 if let Some((old_offset, old_match_len)) = candidate {
955 if match_len > old_match_len
956 || (match_len == old_match_len && offset < old_offset)
957 {
958 candidate = Some((offset, match_len));
959 }
960 } else {
961 candidate = Some((offset, match_len));
962 }
963 }
964 }
965 }
966
967 if let Some((offset, match_len)) = candidate {
968 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
971
972 let last_entry = self.window.last().unwrap();
974 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
975
976 self.suffix_idx += match_len;
978 self.last_idx_in_sequence = self.suffix_idx;
979 let _ = encode_offset_with_history(
980 offset as u32,
981 literals.len() as u32,
982 &mut self.offset_hist,
983 );
984 handle_sequence(Sequence::Triple {
985 literals,
986 offset,
987 match_len,
988 });
989
990 return true;
991 }
992
993 let last_entry = self.window.last_mut().unwrap();
994 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
995 if !last_entry.suffixes.contains_key(key) {
996 last_entry.suffixes.insert(key, self.suffix_idx);
997 }
998 self.suffix_idx += 1;
999 }
1000 }
1001
1002 #[inline(always)]
1004 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1005 let max = a.len().min(b.len());
1006 let chunk = core::mem::size_of::<usize>();
1007 let mut off = 0usize;
1008 let lhs = a.as_ptr();
1009 let rhs = b.as_ptr();
1010
1011 while off + chunk <= max {
1012 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1013 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1014 let diff = lhs_word ^ rhs_word;
1015 if diff != 0 {
1016 return off + Self::mismatch_byte_index(diff);
1017 }
1018 off += chunk;
1019 }
1020
1021 off + core::iter::zip(&a[off..max], &b[off..max])
1022 .take_while(|(x, y)| x == y)
1023 .count()
1024 }
1025
1026 #[inline(always)]
1028 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1029 let start = self.suffix_idx;
1030 let last_entry = self.window.last_mut().unwrap();
1031 if last_entry.data.len() < MIN_MATCH_LEN {
1032 return;
1033 }
1034 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1035 if insert_limit > start {
1036 let data = last_entry.data.as_slice();
1037 let suffixes = &mut last_entry.suffixes;
1038 if fill_step == FAST_HASH_FILL_STEP {
1039 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1040 } else {
1041 let mut pos = start;
1042 while pos < insert_limit {
1043 Self::insert_suffix_if_absent(data, suffixes, pos);
1044 pos += fill_step;
1045 }
1046 }
1047 }
1048
1049 if idx >= start + MIN_MATCH_LEN {
1050 let tail_start = idx - MIN_MATCH_LEN;
1051 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1052 if !last_entry.suffixes.contains_key(tail_key) {
1053 last_entry.suffixes.insert(tail_key, tail_start);
1054 }
1055 }
1056 }
1057
1058 #[inline(always)]
1059 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1060 debug_assert!(
1061 pos + MIN_MATCH_LEN <= data.len(),
1062 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1063 pos,
1064 MIN_MATCH_LEN,
1065 data.len()
1066 );
1067 let key = &data[pos..pos + MIN_MATCH_LEN];
1068 if !suffixes.contains_key(key) {
1069 suffixes.insert(key, pos);
1070 }
1071 }
1072
1073 #[inline(always)]
1074 fn add_suffixes_interleaved_fast(
1075 data: &[u8],
1076 suffixes: &mut SuffixStore,
1077 start: usize,
1078 insert_limit: usize,
1079 ) {
1080 let lane = FAST_HASH_FILL_STEP;
1081 let mut pos = start;
1082
1083 while pos + lane * 3 < insert_limit {
1086 let p0 = pos;
1087 let p1 = pos + lane;
1088 let p2 = pos + lane * 2;
1089 let p3 = pos + lane * 3;
1090
1091 Self::insert_suffix_if_absent(data, suffixes, p0);
1092 Self::insert_suffix_if_absent(data, suffixes, p1);
1093 Self::insert_suffix_if_absent(data, suffixes, p2);
1094 Self::insert_suffix_if_absent(data, suffixes, p3);
1095
1096 pos += lane * 4;
1097 }
1098
1099 while pos < insert_limit {
1100 Self::insert_suffix_if_absent(data, suffixes, pos);
1101 pos += lane;
1102 }
1103 }
1104
1105 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1106 if literals_len != 0 {
1107 return None;
1108 }
1109
1110 let reps = [
1111 Some(self.offset_hist[1] as usize),
1112 Some(self.offset_hist[2] as usize),
1113 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1114 ];
1115
1116 let mut best: Option<(usize, usize)> = None;
1117 let mut seen = [0usize; 3];
1118 let mut seen_len = 0usize;
1119 for offset in reps.into_iter().flatten() {
1120 if offset == 0 {
1121 continue;
1122 }
1123 if seen[..seen_len].contains(&offset) {
1124 continue;
1125 }
1126 seen[seen_len] = offset;
1127 seen_len += 1;
1128
1129 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1130 continue;
1131 };
1132 if match_len < MIN_MATCH_LEN {
1133 continue;
1134 }
1135
1136 if best.is_none_or(|(old_offset, old_len)| {
1137 match_len > old_len || (match_len == old_len && offset < old_offset)
1138 }) {
1139 best = Some((offset, match_len));
1140 }
1141 }
1142 best
1143 }
1144
1145 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1146 if offset == 0 {
1147 return None;
1148 }
1149
1150 let last_idx = self.window.len().checked_sub(1)?;
1151 let last_entry = &self.window[last_idx];
1152 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1153 if offset > searchable_prefix {
1154 return None;
1155 }
1156
1157 let mut remaining = offset;
1158 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1159 (last_idx, self.suffix_idx - remaining)
1160 } else {
1161 remaining -= self.suffix_idx;
1162 let mut found = None;
1163 for entry_idx in (0..last_idx).rev() {
1164 let len = self.window[entry_idx].data.len();
1165 if remaining <= len {
1166 found = Some((entry_idx, len - remaining));
1167 break;
1168 }
1169 remaining -= len;
1170 }
1171 found?
1172 };
1173
1174 let match_entry = &self.window[entry_idx];
1175 let match_slice = &match_entry.data[match_index..];
1176
1177 Some(Self::common_prefix_len(match_slice, data_slice))
1178 }
1179
1180 fn skip_matching(&mut self) {
1182 let len = self.window.last().unwrap().data.len();
1183 self.add_suffixes_till(len, 1);
1184 self.suffix_idx = len;
1185 self.last_idx_in_sequence = len;
1186 }
1187
1188 fn add_data(
1191 &mut self,
1192 data: Vec<u8>,
1193 suffixes: SuffixStore,
1194 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1195 ) {
1196 assert!(
1197 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1198 );
1199 self.reserve(data.len(), reuse_space);
1200 #[cfg(debug_assertions)]
1201 self.concat_window.extend_from_slice(&data);
1202
1203 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1204 for entry in self.window.iter_mut() {
1205 entry.base_offset += last_len;
1206 }
1207 }
1208
1209 let len = data.len();
1210 self.window.push(WindowEntry {
1211 data,
1212 suffixes,
1213 base_offset: 0,
1214 });
1215 self.window_size += len;
1216 self.suffix_idx = 0;
1217 self.last_idx_in_sequence = 0;
1218 }
1219
1220 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1223 assert!(self.max_window_size >= amount);
1224 while self.window_size + amount > self.max_window_size {
1225 let removed = self.window.remove(0);
1226 self.window_size -= removed.data.len();
1227 #[cfg(debug_assertions)]
1228 self.concat_window.drain(0..removed.data.len());
1229
1230 let WindowEntry {
1231 suffixes,
1232 data: leaked_vec,
1233 base_offset: _,
1234 } = removed;
1235 reuse_space(leaked_vec, suffixes);
1236 }
1237 }
1238}
1239
1240struct DfastMatchGenerator {
1241 max_window_size: usize,
1242 window: VecDeque<Vec<u8>>,
1243 window_size: usize,
1244 history: Vec<u8>,
1247 history_start: usize,
1248 history_abs_start: usize,
1249 offset_hist: [u32; 3],
1250 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1251 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1252 hash_bits: usize,
1253 lazy_depth: u8,
1255}
1256
1257#[derive(Copy, Clone)]
1258struct MatchCandidate {
1259 start: usize,
1260 offset: usize,
1261 match_len: usize,
1262}
1263
1264fn best_len_offset_candidate(
1265 lhs: Option<MatchCandidate>,
1266 rhs: Option<MatchCandidate>,
1267) -> Option<MatchCandidate> {
1268 match (lhs, rhs) {
1269 (None, other) | (other, None) => other,
1270 (Some(lhs), Some(rhs)) => {
1271 if rhs.match_len > lhs.match_len
1272 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1273 {
1274 Some(rhs)
1275 } else {
1276 Some(lhs)
1277 }
1278 }
1279 }
1280}
1281
1282fn extend_backwards_shared(
1283 concat: &[u8],
1284 history_abs_start: usize,
1285 mut candidate_pos: usize,
1286 mut abs_pos: usize,
1287 mut match_len: usize,
1288 lit_len: usize,
1289) -> MatchCandidate {
1290 let min_abs_pos = abs_pos - lit_len;
1291 while abs_pos > min_abs_pos
1292 && candidate_pos > history_abs_start
1293 && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1294 {
1295 candidate_pos -= 1;
1296 abs_pos -= 1;
1297 match_len += 1;
1298 }
1299 MatchCandidate {
1300 start: abs_pos,
1301 offset: abs_pos - candidate_pos,
1302 match_len,
1303 }
1304}
1305
1306fn repcode_candidate_shared(
1307 concat: &[u8],
1308 history_abs_start: usize,
1309 offset_hist: [u32; 3],
1310 abs_pos: usize,
1311 lit_len: usize,
1312 min_match_len: usize,
1313) -> Option<MatchCandidate> {
1314 let reps = if lit_len == 0 {
1315 [
1316 Some(offset_hist[1] as usize),
1317 Some(offset_hist[2] as usize),
1318 (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1319 ]
1320 } else {
1321 [
1322 Some(offset_hist[0] as usize),
1323 Some(offset_hist[1] as usize),
1324 Some(offset_hist[2] as usize),
1325 ]
1326 };
1327
1328 let current_idx = abs_pos - history_abs_start;
1329 if current_idx + min_match_len > concat.len() {
1330 return None;
1331 }
1332
1333 let mut best = None;
1334 for rep in reps.into_iter().flatten() {
1335 if rep == 0 || rep > abs_pos {
1336 continue;
1337 }
1338 let candidate_pos = abs_pos - rep;
1339 if candidate_pos < history_abs_start {
1340 continue;
1341 }
1342 let candidate_idx = candidate_pos - history_abs_start;
1343 let match_len =
1344 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1345 if match_len >= min_match_len {
1346 let candidate = extend_backwards_shared(
1347 concat,
1348 history_abs_start,
1349 candidate_pos,
1350 abs_pos,
1351 match_len,
1352 lit_len,
1353 );
1354 best = best_len_offset_candidate(best, Some(candidate));
1355 }
1356 }
1357 best
1358}
1359
1360#[derive(Copy, Clone)]
1361struct LazyMatchConfig {
1362 target_len: usize,
1363 min_match_len: usize,
1364 lazy_depth: u8,
1365 history_abs_end: usize,
1366}
1367
1368fn pick_lazy_match_shared(
1369 abs_pos: usize,
1370 lit_len: usize,
1371 best: Option<MatchCandidate>,
1372 config: LazyMatchConfig,
1373 mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1374) -> Option<MatchCandidate> {
1375 let best = best?;
1376 if best.match_len >= config.target_len
1377 || abs_pos + 1 + config.min_match_len > config.history_abs_end
1378 {
1379 return Some(best);
1380 }
1381
1382 let next = best_match_at(abs_pos + 1, lit_len + 1);
1383 if let Some(next) = next
1384 && (next.match_len > best.match_len
1385 || (next.match_len == best.match_len && next.offset < best.offset))
1386 {
1387 return None;
1388 }
1389
1390 if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1391 let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1392 if let Some(next2) = next2
1393 && next2.match_len > best.match_len + 1
1394 {
1395 return None;
1396 }
1397 }
1398
1399 Some(best)
1400}
1401
1402impl DfastMatchGenerator {
1403 fn new(max_window_size: usize) -> Self {
1404 Self {
1405 max_window_size,
1406 window: VecDeque::new(),
1407 window_size: 0,
1408 history: Vec::new(),
1409 history_start: 0,
1410 history_abs_start: 0,
1411 offset_hist: [1, 4, 8],
1412 short_hash: Vec::new(),
1413 long_hash: Vec::new(),
1414 hash_bits: DFAST_HASH_BITS,
1415 lazy_depth: 1,
1416 }
1417 }
1418
1419 fn set_hash_bits(&mut self, bits: usize) {
1420 let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1421 if self.hash_bits != clamped {
1422 self.hash_bits = clamped;
1423 self.short_hash = Vec::new();
1424 self.long_hash = Vec::new();
1425 }
1426 }
1427
1428 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1429 self.window_size = 0;
1430 self.history.clear();
1431 self.history_start = 0;
1432 self.history_abs_start = 0;
1433 self.offset_hist = [1, 4, 8];
1434 if !self.short_hash.is_empty() {
1435 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1436 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1437 }
1438 for mut data in self.window.drain(..) {
1439 data.resize(data.capacity(), 0);
1440 reuse_space(data);
1441 }
1442 }
1443
1444 fn get_last_space(&self) -> &[u8] {
1445 self.window.back().unwrap().as_slice()
1446 }
1447
1448 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1449 assert!(data.len() <= self.max_window_size);
1450 while self.window_size + data.len() > self.max_window_size {
1451 let removed = self.window.pop_front().unwrap();
1452 self.window_size -= removed.len();
1453 self.history_start += removed.len();
1454 self.history_abs_start += removed.len();
1455 reuse_space(removed);
1456 }
1457 self.compact_history();
1458 self.history.extend_from_slice(&data);
1459 self.window_size += data.len();
1460 self.window.push_back(data);
1461 }
1462
1463 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1464 while self.window_size > self.max_window_size {
1465 let removed = self.window.pop_front().unwrap();
1466 self.window_size -= removed.len();
1467 self.history_start += removed.len();
1468 self.history_abs_start += removed.len();
1469 reuse_space(removed);
1470 }
1471 }
1472
1473 fn skip_matching(&mut self) {
1474 self.ensure_hash_tables();
1475 let current_len = self.window.back().unwrap().len();
1476 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1477 self.insert_positions(current_abs_start, current_abs_start + current_len);
1478 }
1479
1480 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1481 self.ensure_hash_tables();
1482
1483 let current_len = self.window.back().unwrap().len();
1484 if current_len == 0 {
1485 return;
1486 }
1487
1488 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1489
1490 let mut pos = 0usize;
1491 let mut literals_start = 0usize;
1492 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1493 let abs_pos = current_abs_start + pos;
1494 let lit_len = pos - literals_start;
1495
1496 let best = self.best_match(abs_pos, lit_len);
1497 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1498 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1499 let current = self.window.back().unwrap().as_slice();
1500 let start = candidate.start - current_abs_start;
1501 let literals = ¤t[literals_start..start];
1502 handle_sequence(Sequence::Triple {
1503 literals,
1504 offset: candidate.offset,
1505 match_len: candidate.match_len,
1506 });
1507 let _ = encode_offset_with_history(
1510 candidate.offset as u32,
1511 literals.len() as u32,
1512 &mut self.offset_hist,
1513 );
1514 pos = start + candidate.match_len;
1515 literals_start = pos;
1516 } else {
1517 self.insert_position(abs_pos);
1518 pos += 1;
1519 }
1520 }
1521
1522 if literals_start < current_len {
1523 let current = self.window.back().unwrap().as_slice();
1530 handle_sequence(Sequence::Literals {
1531 literals: ¤t[literals_start..],
1532 });
1533 }
1534 }
1535
1536 fn ensure_hash_tables(&mut self) {
1537 let table_len = 1usize << self.hash_bits;
1538 if self.short_hash.len() != table_len {
1539 self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1543 self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1544 }
1545 }
1546
1547 fn compact_history(&mut self) {
1548 if self.history_start == 0 {
1549 return;
1550 }
1551 if self.history_start >= self.max_window_size
1552 || self.history_start * 2 >= self.history.len()
1553 {
1554 self.history.drain(..self.history_start);
1555 self.history_start = 0;
1556 }
1557 }
1558
1559 fn live_history(&self) -> &[u8] {
1560 &self.history[self.history_start..]
1561 }
1562
1563 fn history_abs_end(&self) -> usize {
1564 self.history_abs_start + self.live_history().len()
1565 }
1566
1567 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1568 let rep = self.repcode_candidate(abs_pos, lit_len);
1569 let hash = self.hash_candidate(abs_pos, lit_len);
1570 best_len_offset_candidate(rep, hash)
1571 }
1572
1573 fn pick_lazy_match(
1574 &self,
1575 abs_pos: usize,
1576 lit_len: usize,
1577 best: Option<MatchCandidate>,
1578 ) -> Option<MatchCandidate> {
1579 pick_lazy_match_shared(
1580 abs_pos,
1581 lit_len,
1582 best,
1583 LazyMatchConfig {
1584 target_len: DFAST_TARGET_LEN,
1585 min_match_len: DFAST_MIN_MATCH_LEN,
1586 lazy_depth: self.lazy_depth,
1587 history_abs_end: self.history_abs_end(),
1588 },
1589 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1590 )
1591 }
1592
1593 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1594 repcode_candidate_shared(
1595 self.live_history(),
1596 self.history_abs_start,
1597 self.offset_hist,
1598 abs_pos,
1599 lit_len,
1600 DFAST_MIN_MATCH_LEN,
1601 )
1602 }
1603
1604 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1605 let concat = self.live_history();
1606 let current_idx = abs_pos - self.history_abs_start;
1607 let mut best = None;
1608 for candidate_pos in self.long_candidates(abs_pos) {
1609 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1610 continue;
1611 }
1612 let candidate_idx = candidate_pos - self.history_abs_start;
1613 let match_len =
1614 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1615 if match_len >= DFAST_MIN_MATCH_LEN {
1616 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1617 best = best_len_offset_candidate(best, Some(candidate));
1618 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1619 return best;
1620 }
1621 }
1622 }
1623
1624 for candidate_pos in self.short_candidates(abs_pos) {
1625 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1626 continue;
1627 }
1628 let candidate_idx = candidate_pos - self.history_abs_start;
1629 let match_len =
1630 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1631 if match_len >= DFAST_MIN_MATCH_LEN {
1632 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1633 best = best_len_offset_candidate(best, Some(candidate));
1634 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1635 return best;
1636 }
1637 }
1638 }
1639 best
1640 }
1641
1642 fn extend_backwards(
1643 &self,
1644 candidate_pos: usize,
1645 abs_pos: usize,
1646 match_len: usize,
1647 lit_len: usize,
1648 ) -> MatchCandidate {
1649 extend_backwards_shared(
1650 self.live_history(),
1651 self.history_abs_start,
1652 candidate_pos,
1653 abs_pos,
1654 match_len,
1655 lit_len,
1656 )
1657 }
1658
1659 fn insert_positions(&mut self, start: usize, end: usize) {
1660 for pos in start..end {
1661 self.insert_position(pos);
1662 }
1663 }
1664
1665 fn insert_position(&mut self, pos: usize) {
1666 let idx = pos - self.history_abs_start;
1667 let short = {
1668 let concat = self.live_history();
1669 (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1670 };
1671 if let Some(short) = short {
1672 let bucket = &mut self.short_hash[short];
1673 if bucket[0] != pos {
1674 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1675 bucket[0] = pos;
1676 }
1677 }
1678
1679 let long = {
1680 let concat = self.live_history();
1681 (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1682 };
1683 if let Some(long) = long {
1684 let bucket = &mut self.long_hash[long];
1685 if bucket[0] != pos {
1686 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1687 bucket[0] = pos;
1688 }
1689 }
1690 }
1691
1692 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1693 let concat = self.live_history();
1694 let idx = pos - self.history_abs_start;
1695 (idx + 4 <= concat.len())
1696 .then(|| self.short_hash[self.hash4(&concat[idx..])])
1697 .into_iter()
1698 .flatten()
1699 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1700 }
1701
1702 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1703 let concat = self.live_history();
1704 let idx = pos - self.history_abs_start;
1705 (idx + 8 <= concat.len())
1706 .then(|| self.long_hash[self.hash8(&concat[idx..])])
1707 .into_iter()
1708 .flatten()
1709 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1710 }
1711
1712 fn hash4(&self, data: &[u8]) -> usize {
1713 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1714 self.hash_index(value)
1715 }
1716
1717 fn hash8(&self, data: &[u8]) -> usize {
1718 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1719 self.hash_index(value)
1720 }
1721
1722 fn hash_index(&self, value: u64) -> usize {
1723 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1724 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1725 }
1726}
1727
1728struct RowMatchGenerator {
1729 max_window_size: usize,
1730 window: VecDeque<Vec<u8>>,
1731 window_size: usize,
1732 history: Vec<u8>,
1733 history_start: usize,
1734 history_abs_start: usize,
1735 offset_hist: [u32; 3],
1736 row_hash_log: usize,
1737 row_log: usize,
1738 search_depth: usize,
1739 target_len: usize,
1740 lazy_depth: u8,
1741 row_heads: Vec<u8>,
1742 row_positions: Vec<usize>,
1743 row_tags: Vec<u8>,
1744}
1745
1746impl RowMatchGenerator {
1747 fn new(max_window_size: usize) -> Self {
1748 Self {
1749 max_window_size,
1750 window: VecDeque::new(),
1751 window_size: 0,
1752 history: Vec::new(),
1753 history_start: 0,
1754 history_abs_start: 0,
1755 offset_hist: [1, 4, 8],
1756 row_hash_log: ROW_HASH_BITS - ROW_LOG,
1757 row_log: ROW_LOG,
1758 search_depth: ROW_SEARCH_DEPTH,
1759 target_len: ROW_TARGET_LEN,
1760 lazy_depth: 1,
1761 row_heads: Vec::new(),
1762 row_positions: Vec::new(),
1763 row_tags: Vec::new(),
1764 }
1765 }
1766
1767 fn set_hash_bits(&mut self, bits: usize) {
1768 let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
1769 let row_hash_log = clamped.saturating_sub(self.row_log);
1770 if self.row_hash_log != row_hash_log {
1771 self.row_hash_log = row_hash_log;
1772 self.row_heads.clear();
1773 self.row_positions.clear();
1774 self.row_tags.clear();
1775 }
1776 }
1777
1778 fn configure(&mut self, config: RowConfig) {
1779 self.row_log = config.row_log.clamp(4, 6);
1780 self.search_depth = config.search_depth;
1781 self.target_len = config.target_len;
1782 self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
1783 }
1784
1785 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1786 self.window_size = 0;
1787 self.history.clear();
1788 self.history_start = 0;
1789 self.history_abs_start = 0;
1790 self.offset_hist = [1, 4, 8];
1791 self.row_heads.fill(0);
1792 self.row_positions.fill(ROW_EMPTY_SLOT);
1793 self.row_tags.fill(0);
1794 for mut data in self.window.drain(..) {
1795 data.resize(data.capacity(), 0);
1796 reuse_space(data);
1797 }
1798 }
1799
1800 fn get_last_space(&self) -> &[u8] {
1801 self.window.back().unwrap().as_slice()
1802 }
1803
1804 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1805 assert!(data.len() <= self.max_window_size);
1806 while self.window_size + data.len() > self.max_window_size {
1807 let removed = self.window.pop_front().unwrap();
1808 self.window_size -= removed.len();
1809 self.history_start += removed.len();
1810 self.history_abs_start += removed.len();
1811 reuse_space(removed);
1812 }
1813 self.compact_history();
1814 self.history.extend_from_slice(&data);
1815 self.window_size += data.len();
1816 self.window.push_back(data);
1817 }
1818
1819 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1820 while self.window_size > self.max_window_size {
1821 let removed = self.window.pop_front().unwrap();
1822 self.window_size -= removed.len();
1823 self.history_start += removed.len();
1824 self.history_abs_start += removed.len();
1825 reuse_space(removed);
1826 }
1827 }
1828
1829 fn skip_matching(&mut self) {
1830 self.ensure_tables();
1831 let current_len = self.window.back().unwrap().len();
1832 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1833 let backfill_start = self.backfill_start(current_abs_start);
1834 if backfill_start < current_abs_start {
1835 self.insert_positions(backfill_start, current_abs_start);
1836 }
1837 self.insert_positions(current_abs_start, current_abs_start + current_len);
1838 }
1839
1840 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1841 self.ensure_tables();
1842
1843 let current_len = self.window.back().unwrap().len();
1844 if current_len == 0 {
1845 return;
1846 }
1847 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1848 let backfill_start = self.backfill_start(current_abs_start);
1849 if backfill_start < current_abs_start {
1850 self.insert_positions(backfill_start, current_abs_start);
1851 }
1852
1853 let mut pos = 0usize;
1854 let mut literals_start = 0usize;
1855 while pos + ROW_MIN_MATCH_LEN <= current_len {
1856 let abs_pos = current_abs_start + pos;
1857 let lit_len = pos - literals_start;
1858
1859 let best = self.best_match(abs_pos, lit_len);
1860 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1861 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1862 let current = self.window.back().unwrap().as_slice();
1863 let start = candidate.start - current_abs_start;
1864 let literals = ¤t[literals_start..start];
1865 handle_sequence(Sequence::Triple {
1866 literals,
1867 offset: candidate.offset,
1868 match_len: candidate.match_len,
1869 });
1870 let _ = encode_offset_with_history(
1871 candidate.offset as u32,
1872 literals.len() as u32,
1873 &mut self.offset_hist,
1874 );
1875 pos = start + candidate.match_len;
1876 literals_start = pos;
1877 } else {
1878 self.insert_position(abs_pos);
1879 pos += 1;
1880 }
1881 }
1882
1883 while pos + ROW_HASH_KEY_LEN <= current_len {
1884 self.insert_position(current_abs_start + pos);
1885 pos += 1;
1886 }
1887
1888 if literals_start < current_len {
1889 let current = self.window.back().unwrap().as_slice();
1890 handle_sequence(Sequence::Literals {
1891 literals: ¤t[literals_start..],
1892 });
1893 }
1894 }
1895
1896 fn ensure_tables(&mut self) {
1897 let row_count = 1usize << self.row_hash_log;
1898 let row_entries = 1usize << self.row_log;
1899 let total = row_count * row_entries;
1900 if self.row_positions.len() != total {
1901 self.row_heads = alloc::vec![0; row_count];
1902 self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
1903 self.row_tags = alloc::vec![0; total];
1904 }
1905 }
1906
1907 fn compact_history(&mut self) {
1908 if self.history_start == 0 {
1909 return;
1910 }
1911 if self.history_start >= self.max_window_size
1912 || self.history_start * 2 >= self.history.len()
1913 {
1914 self.history.drain(..self.history_start);
1915 self.history_start = 0;
1916 }
1917 }
1918
1919 fn live_history(&self) -> &[u8] {
1920 &self.history[self.history_start..]
1921 }
1922
1923 fn history_abs_end(&self) -> usize {
1924 self.history_abs_start + self.live_history().len()
1925 }
1926
1927 fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
1928 let idx = abs_pos - self.history_abs_start;
1929 let concat = self.live_history();
1930 if idx + ROW_HASH_KEY_LEN > concat.len() {
1931 return None;
1932 }
1933 let value =
1934 u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
1935 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1936 let hash = value.wrapping_mul(PRIME);
1937 let total_bits = self.row_hash_log + ROW_TAG_BITS;
1938 let combined = hash >> (u64::BITS as usize - total_bits);
1939 let row_mask = (1usize << self.row_hash_log) - 1;
1940 let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
1941 let tag = combined as u8;
1942 Some((row, tag))
1943 }
1944
1945 fn backfill_start(&self, current_abs_start: usize) -> usize {
1946 current_abs_start
1947 .saturating_sub(ROW_HASH_KEY_LEN - 1)
1948 .max(self.history_abs_start)
1949 }
1950
1951 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1952 let rep = self.repcode_candidate(abs_pos, lit_len);
1953 let row = self.row_candidate(abs_pos, lit_len);
1954 best_len_offset_candidate(rep, row)
1955 }
1956
1957 fn pick_lazy_match(
1958 &self,
1959 abs_pos: usize,
1960 lit_len: usize,
1961 best: Option<MatchCandidate>,
1962 ) -> Option<MatchCandidate> {
1963 pick_lazy_match_shared(
1964 abs_pos,
1965 lit_len,
1966 best,
1967 LazyMatchConfig {
1968 target_len: self.target_len,
1969 min_match_len: ROW_MIN_MATCH_LEN,
1970 lazy_depth: self.lazy_depth,
1971 history_abs_end: self.history_abs_end(),
1972 },
1973 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1974 )
1975 }
1976
1977 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1978 repcode_candidate_shared(
1979 self.live_history(),
1980 self.history_abs_start,
1981 self.offset_hist,
1982 abs_pos,
1983 lit_len,
1984 ROW_MIN_MATCH_LEN,
1985 )
1986 }
1987
1988 fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1989 let concat = self.live_history();
1990 let current_idx = abs_pos - self.history_abs_start;
1991 if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
1992 return None;
1993 }
1994
1995 let (row, tag) = self.hash_and_row(abs_pos)?;
1996 let row_entries = 1usize << self.row_log;
1997 let row_mask = row_entries - 1;
1998 let row_base = row << self.row_log;
1999 let head = self.row_heads[row] as usize;
2000 let max_walk = self.search_depth.min(row_entries);
2001
2002 let mut best = None;
2003 for i in 0..max_walk {
2004 let slot = (head + i) & row_mask;
2005 let idx = row_base + slot;
2006 if self.row_tags[idx] != tag {
2007 continue;
2008 }
2009 let candidate_pos = self.row_positions[idx];
2010 if candidate_pos == ROW_EMPTY_SLOT
2011 || candidate_pos < self.history_abs_start
2012 || candidate_pos >= abs_pos
2013 {
2014 continue;
2015 }
2016 let candidate_idx = candidate_pos - self.history_abs_start;
2017 let match_len =
2018 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2019 if match_len >= ROW_MIN_MATCH_LEN {
2020 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2021 best = best_len_offset_candidate(best, Some(candidate));
2022 if best.is_some_and(|best| best.match_len >= self.target_len) {
2023 return best;
2024 }
2025 }
2026 }
2027 best
2028 }
2029
2030 fn extend_backwards(
2031 &self,
2032 candidate_pos: usize,
2033 abs_pos: usize,
2034 match_len: usize,
2035 lit_len: usize,
2036 ) -> MatchCandidate {
2037 extend_backwards_shared(
2038 self.live_history(),
2039 self.history_abs_start,
2040 candidate_pos,
2041 abs_pos,
2042 match_len,
2043 lit_len,
2044 )
2045 }
2046
2047 fn insert_positions(&mut self, start: usize, end: usize) {
2048 for pos in start..end {
2049 self.insert_position(pos);
2050 }
2051 }
2052
2053 fn insert_position(&mut self, abs_pos: usize) {
2054 let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2055 return;
2056 };
2057 let row_entries = 1usize << self.row_log;
2058 let row_mask = row_entries - 1;
2059 let row_base = row << self.row_log;
2060 let head = self.row_heads[row] as usize;
2061 let next = head.wrapping_sub(1) & row_mask;
2062 self.row_heads[row] = next as u8;
2063 self.row_tags[row_base + next] = tag;
2064 self.row_positions[row_base + next] = abs_pos;
2065 }
2066}
2067
2068struct HcMatchGenerator {
2069 max_window_size: usize,
2070 window: VecDeque<Vec<u8>>,
2071 window_size: usize,
2072 history: Vec<u8>,
2073 history_start: usize,
2074 history_abs_start: usize,
2075 position_base: usize,
2076 offset_hist: [u32; 3],
2077 hash_table: Vec<u32>,
2078 chain_table: Vec<u32>,
2079 lazy_depth: u8,
2080 hash_log: usize,
2081 chain_log: usize,
2082 search_depth: usize,
2083 target_len: usize,
2084}
2085
2086impl HcMatchGenerator {
2087 fn new(max_window_size: usize) -> Self {
2088 Self {
2089 max_window_size,
2090 window: VecDeque::new(),
2091 window_size: 0,
2092 history: Vec::new(),
2093 history_start: 0,
2094 history_abs_start: 0,
2095 position_base: 0,
2096 offset_hist: [1, 4, 8],
2097 hash_table: Vec::new(),
2098 chain_table: Vec::new(),
2099 lazy_depth: 2,
2100 hash_log: HC_HASH_LOG,
2101 chain_log: HC_CHAIN_LOG,
2102 search_depth: HC_SEARCH_DEPTH,
2103 target_len: HC_TARGET_LEN,
2104 }
2105 }
2106
2107 fn configure(&mut self, config: HcConfig) {
2108 let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2109 self.hash_log = config.hash_log;
2110 self.chain_log = config.chain_log;
2111 self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2112 self.target_len = config.target_len;
2113 if resize && !self.hash_table.is_empty() {
2114 self.hash_table.clear();
2116 self.chain_table.clear();
2117 }
2118 }
2119
2120 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2121 self.window_size = 0;
2122 self.history.clear();
2123 self.history_start = 0;
2124 self.history_abs_start = 0;
2125 self.position_base = 0;
2126 self.offset_hist = [1, 4, 8];
2127 if !self.hash_table.is_empty() {
2128 self.hash_table.fill(HC_EMPTY);
2129 self.chain_table.fill(HC_EMPTY);
2130 }
2131 for mut data in self.window.drain(..) {
2132 data.resize(data.capacity(), 0);
2133 reuse_space(data);
2134 }
2135 }
2136
2137 fn get_last_space(&self) -> &[u8] {
2138 self.window.back().unwrap().as_slice()
2139 }
2140
2141 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2145 assert!(data.len() <= self.max_window_size);
2146 while self.window_size + data.len() > self.max_window_size {
2147 let removed = self.window.pop_front().unwrap();
2148 self.window_size -= removed.len();
2149 self.history_start += removed.len();
2150 self.history_abs_start += removed.len();
2151 reuse_space(removed);
2152 }
2153 self.compact_history();
2154 self.history.extend_from_slice(&data);
2155 self.window_size += data.len();
2156 self.window.push_back(data);
2157 }
2158
2159 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2160 while self.window_size > self.max_window_size {
2161 let removed = self.window.pop_front().unwrap();
2162 self.window_size -= removed.len();
2163 self.history_start += removed.len();
2164 self.history_abs_start += removed.len();
2165 reuse_space(removed);
2166 }
2167 }
2168
2169 fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2172 let backfill_start = current_abs_start
2173 .saturating_sub(3)
2174 .max(self.history_abs_start);
2175 if backfill_start < current_abs_start {
2176 self.insert_positions(backfill_start, current_abs_start);
2177 }
2178 }
2179
2180 fn skip_matching(&mut self) {
2181 self.ensure_tables();
2182 let current_len = self.window.back().unwrap().len();
2183 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2184 self.backfill_boundary_positions(current_abs_start);
2185 self.insert_positions(current_abs_start, current_abs_start + current_len);
2186 }
2187
2188 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2189 self.ensure_tables();
2190
2191 let current_len = self.window.back().unwrap().len();
2192 if current_len == 0 {
2193 return;
2194 }
2195
2196 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2197 self.backfill_boundary_positions(current_abs_start);
2198
2199 let mut pos = 0usize;
2200 let mut literals_start = 0usize;
2201 while pos + HC_MIN_MATCH_LEN <= current_len {
2202 let abs_pos = current_abs_start + pos;
2203 let lit_len = pos - literals_start;
2204
2205 let best = self.find_best_match(abs_pos, lit_len);
2206 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2207 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2208 let current = self.window.back().unwrap().as_slice();
2209 let start = candidate.start - current_abs_start;
2210 let literals = ¤t[literals_start..start];
2211 handle_sequence(Sequence::Triple {
2212 literals,
2213 offset: candidate.offset,
2214 match_len: candidate.match_len,
2215 });
2216 let _ = encode_offset_with_history(
2217 candidate.offset as u32,
2218 literals.len() as u32,
2219 &mut self.offset_hist,
2220 );
2221 pos = start + candidate.match_len;
2222 literals_start = pos;
2223 } else {
2224 self.insert_position(abs_pos);
2225 pos += 1;
2226 }
2227 }
2228
2229 while pos + 4 <= current_len {
2232 self.insert_position(current_abs_start + pos);
2233 pos += 1;
2234 }
2235
2236 if literals_start < current_len {
2237 let current = self.window.back().unwrap().as_slice();
2238 handle_sequence(Sequence::Literals {
2239 literals: ¤t[literals_start..],
2240 });
2241 }
2242 }
2243
2244 fn ensure_tables(&mut self) {
2245 if self.hash_table.is_empty() {
2246 self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2247 self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2248 }
2249 }
2250
2251 fn compact_history(&mut self) {
2252 if self.history_start == 0 {
2253 return;
2254 }
2255 if self.history_start >= self.max_window_size
2256 || self.history_start * 2 >= self.history.len()
2257 {
2258 self.history.drain(..self.history_start);
2259 self.history_start = 0;
2260 }
2261 }
2262
2263 fn live_history(&self) -> &[u8] {
2264 &self.history[self.history_start..]
2265 }
2266
2267 fn history_abs_end(&self) -> usize {
2268 self.history_abs_start + self.live_history().len()
2269 }
2270
2271 fn hash_position(&self, data: &[u8]) -> usize {
2272 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2273 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2274 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2275 }
2276
2277 fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2278 let rel = abs_pos.checked_sub(self.position_base)?;
2279 let rel_u32 = u32::try_from(rel).ok()?;
2280 (rel_u32 < u32::MAX).then_some(rel_u32)
2284 }
2285
2286 fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2287 let needs_rebase = self
2288 .relative_position(abs_pos)
2289 .is_none_or(|relative| relative >= u32::MAX - 1);
2290 if !needs_rebase {
2291 return;
2292 }
2293
2294 self.position_base = self.history_abs_start;
2296 self.hash_table.fill(HC_EMPTY);
2297 self.chain_table.fill(HC_EMPTY);
2298
2299 let history_start = self.history_abs_start;
2300 for pos in history_start..abs_pos {
2303 self.insert_position_no_rebase(pos);
2304 }
2305 }
2306
2307 fn insert_position(&mut self, abs_pos: usize) {
2308 self.maybe_rebase_positions(abs_pos);
2309 self.insert_position_no_rebase(abs_pos);
2310 }
2311
2312 fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2313 let idx = abs_pos - self.history_abs_start;
2314 let concat = self.live_history();
2315 if idx + 4 > concat.len() {
2316 return;
2317 }
2318 let hash = self.hash_position(&concat[idx..]);
2319 let Some(relative_pos) = self.relative_position(abs_pos) else {
2320 return;
2321 };
2322 let stored = relative_pos + 1;
2323 let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2324 let prev = self.hash_table[hash];
2325 self.chain_table[chain_idx] = prev;
2326 self.hash_table[hash] = stored;
2327 }
2328
2329 fn insert_positions(&mut self, start: usize, end: usize) {
2330 for pos in start..end {
2331 self.insert_position(pos);
2332 }
2333 }
2334
2335 fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2338 let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2339 let idx = abs_pos - self.history_abs_start;
2340 let concat = self.live_history();
2341 if idx + 4 > concat.len() {
2342 return buf;
2343 }
2344 let hash = self.hash_position(&concat[idx..]);
2345 let chain_mask = (1 << self.chain_log) - 1;
2346
2347 let mut cur = self.hash_table[hash];
2348 let mut filled = 0;
2349 let mut steps = 0;
2357 let max_chain_steps = self.search_depth * 4;
2358 while filled < self.search_depth && steps < max_chain_steps {
2359 if cur == HC_EMPTY {
2360 break;
2361 }
2362 let candidate_rel = cur.wrapping_sub(1) as usize;
2363 let candidate_abs = self.position_base + candidate_rel;
2364 let next = self.chain_table[candidate_rel & chain_mask];
2365 steps += 1;
2366 if next == cur {
2367 if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2370 buf[filled] = candidate_abs;
2371 }
2372 break;
2373 }
2374 cur = next;
2375 if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2376 continue;
2377 }
2378 buf[filled] = candidate_abs;
2379 filled += 1;
2380 }
2381 buf
2382 }
2383
2384 fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2385 let rep = self.repcode_candidate(abs_pos, lit_len);
2386 let hash = self.hash_chain_candidate(abs_pos, lit_len);
2387 Self::better_candidate(rep, hash)
2388 }
2389
2390 fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2391 let concat = self.live_history();
2392 let current_idx = abs_pos - self.history_abs_start;
2393 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2394 return None;
2395 }
2396
2397 let mut best: Option<MatchCandidate> = None;
2398 for candidate_abs in self.chain_candidates(abs_pos) {
2399 if candidate_abs == usize::MAX {
2400 break;
2401 }
2402 let candidate_idx = candidate_abs - self.history_abs_start;
2403 let match_len =
2404 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2405 if match_len >= HC_MIN_MATCH_LEN {
2406 let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2407 best = Self::better_candidate(best, Some(candidate));
2408 if best.is_some_and(|b| b.match_len >= self.target_len) {
2409 return best;
2410 }
2411 }
2412 }
2413 best
2414 }
2415
2416 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2417 let reps = if lit_len == 0 {
2418 [
2419 Some(self.offset_hist[1] as usize),
2420 Some(self.offset_hist[2] as usize),
2421 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2422 ]
2423 } else {
2424 [
2425 Some(self.offset_hist[0] as usize),
2426 Some(self.offset_hist[1] as usize),
2427 Some(self.offset_hist[2] as usize),
2428 ]
2429 };
2430
2431 let concat = self.live_history();
2432 let current_idx = abs_pos - self.history_abs_start;
2433 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2434 return None;
2435 }
2436
2437 let mut best = None;
2438 for rep in reps.into_iter().flatten() {
2439 if rep == 0 || rep > abs_pos {
2440 continue;
2441 }
2442 let candidate_pos = abs_pos - rep;
2443 if candidate_pos < self.history_abs_start {
2444 continue;
2445 }
2446 let candidate_idx = candidate_pos - self.history_abs_start;
2447 let match_len =
2448 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2449 if match_len >= HC_MIN_MATCH_LEN {
2450 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2451 best = Self::better_candidate(best, Some(candidate));
2452 }
2453 }
2454 best
2455 }
2456
2457 fn extend_backwards(
2458 &self,
2459 mut candidate_pos: usize,
2460 mut abs_pos: usize,
2461 mut match_len: usize,
2462 lit_len: usize,
2463 ) -> MatchCandidate {
2464 let concat = self.live_history();
2465 let min_abs_pos = abs_pos - lit_len;
2466 while abs_pos > min_abs_pos
2467 && candidate_pos > self.history_abs_start
2468 && concat[candidate_pos - self.history_abs_start - 1]
2469 == concat[abs_pos - self.history_abs_start - 1]
2470 {
2471 candidate_pos -= 1;
2472 abs_pos -= 1;
2473 match_len += 1;
2474 }
2475 MatchCandidate {
2476 start: abs_pos,
2477 offset: abs_pos - candidate_pos,
2478 match_len,
2479 }
2480 }
2481
2482 fn better_candidate(
2483 lhs: Option<MatchCandidate>,
2484 rhs: Option<MatchCandidate>,
2485 ) -> Option<MatchCandidate> {
2486 match (lhs, rhs) {
2487 (None, other) | (other, None) => other,
2488 (Some(lhs), Some(rhs)) => {
2489 let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
2490 let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
2491 if rhs_gain > lhs_gain {
2492 Some(rhs)
2493 } else {
2494 Some(lhs)
2495 }
2496 }
2497 }
2498 }
2499
2500 fn match_gain(match_len: usize, offset: usize) -> i32 {
2501 debug_assert!(
2502 offset > 0,
2503 "zstd offsets are 1-indexed, offset=0 is invalid"
2504 );
2505 let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
2506 (match_len as i32) * 4 - offset_bits
2507 }
2508
2509 fn pick_lazy_match(
2513 &self,
2514 abs_pos: usize,
2515 lit_len: usize,
2516 best: Option<MatchCandidate>,
2517 ) -> Option<MatchCandidate> {
2518 let best = best?;
2519 if best.match_len >= self.target_len
2520 || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
2521 {
2522 return Some(best);
2523 }
2524
2525 let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
2526
2527 let next = self.find_best_match(abs_pos + 1, lit_len + 1);
2529 if let Some(next) = next {
2530 let next_gain = Self::match_gain(next.match_len, next.offset);
2531 if next_gain > current_gain {
2532 return None;
2533 }
2534 }
2535
2536 if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
2538 let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
2539 if let Some(next2) = next2 {
2540 let next2_gain = Self::match_gain(next2.match_len, next2.offset);
2541 if next2_gain > current_gain + 4 {
2543 return None;
2544 }
2545 }
2546 }
2547
2548 Some(best)
2549 }
2550}
2551
2552#[test]
2553fn matches() {
2554 let mut matcher = MatchGenerator::new(1000);
2555 let mut original_data = Vec::new();
2556 let mut reconstructed = Vec::new();
2557
2558 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
2559 Sequence::Literals { literals } => {
2560 assert!(!literals.is_empty());
2561 reconstructed.extend_from_slice(literals);
2562 }
2563 Sequence::Triple {
2564 literals,
2565 offset,
2566 match_len,
2567 } => {
2568 assert!(offset > 0);
2569 assert!(match_len >= MIN_MATCH_LEN);
2570 reconstructed.extend_from_slice(literals);
2571 assert!(offset <= reconstructed.len());
2572 let start = reconstructed.len() - offset;
2573 for i in 0..match_len {
2574 let byte = reconstructed[start + i];
2575 reconstructed.push(byte);
2576 }
2577 }
2578 };
2579
2580 matcher.add_data(
2581 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2582 SuffixStore::with_capacity(100),
2583 |_, _| {},
2584 );
2585 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2586
2587 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2588
2589 assert!(!matcher.next_sequence(|_| {}));
2590
2591 matcher.add_data(
2592 alloc::vec![
2593 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2594 ],
2595 SuffixStore::with_capacity(100),
2596 |_, _| {},
2597 );
2598 original_data.extend_from_slice(&[
2599 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2600 ]);
2601
2602 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2603 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2604 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2605 assert!(!matcher.next_sequence(|_| {}));
2606
2607 matcher.add_data(
2608 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2609 SuffixStore::with_capacity(100),
2610 |_, _| {},
2611 );
2612 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2613
2614 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2615 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2616 assert!(!matcher.next_sequence(|_| {}));
2617
2618 matcher.add_data(
2619 alloc::vec![0, 0, 0, 0, 0],
2620 SuffixStore::with_capacity(100),
2621 |_, _| {},
2622 );
2623 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2624
2625 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2626 assert!(!matcher.next_sequence(|_| {}));
2627
2628 matcher.add_data(
2629 alloc::vec![7, 8, 9, 10, 11],
2630 SuffixStore::with_capacity(100),
2631 |_, _| {},
2632 );
2633 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2634
2635 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2636 assert!(!matcher.next_sequence(|_| {}));
2637
2638 matcher.add_data(
2639 alloc::vec![1, 3, 5, 7, 9],
2640 SuffixStore::with_capacity(100),
2641 |_, _| {},
2642 );
2643 matcher.skip_matching();
2644 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2645 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2646 assert!(!matcher.next_sequence(|_| {}));
2647
2648 matcher.add_data(
2649 alloc::vec![1, 3, 5, 7, 9],
2650 SuffixStore::with_capacity(100),
2651 |_, _| {},
2652 );
2653 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2654
2655 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2656 assert!(!matcher.next_sequence(|_| {}));
2657
2658 matcher.add_data(
2659 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2660 SuffixStore::with_capacity(100),
2661 |_, _| {},
2662 );
2663 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2664
2665 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2666 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2667 assert!(!matcher.next_sequence(|_| {}));
2668
2669 assert_eq!(reconstructed, original_data);
2670}
2671
2672#[test]
2673fn dfast_matches_roundtrip_multi_block_pattern() {
2674 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2675 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2676 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2677
2678 let mut matcher = DfastMatchGenerator::new(1 << 22);
2679 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2680 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2681 Sequence::Triple {
2682 literals,
2683 offset,
2684 match_len,
2685 } => {
2686 decoded.extend_from_slice(literals);
2687 let start = decoded.len() - offset;
2688 for i in 0..match_len {
2689 let byte = decoded[start + i];
2690 decoded.push(byte);
2691 }
2692 }
2693 };
2694
2695 matcher.add_data(first_block.clone(), |_| {});
2696 let mut history = Vec::new();
2697 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2698 assert_eq!(history, first_block);
2699
2700 matcher.add_data(second_block.clone(), |_| {});
2701 let prefix_len = history.len();
2702 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2703
2704 assert_eq!(&history[prefix_len..], second_block.as_slice());
2705}
2706
2707#[test]
2708fn driver_switches_backends_and_initializes_dfast_via_reset() {
2709 let mut driver = MatchGeneratorDriver::new(32, 2);
2710
2711 driver.reset(CompressionLevel::Default);
2712 assert_eq!(driver.active_backend, MatcherBackend::Dfast);
2713 assert_eq!(driver.window_size(), (1u64 << 22));
2714
2715 let mut first = driver.get_next_space();
2716 first[..12].copy_from_slice(b"abcabcabcabc");
2717 first.truncate(12);
2718 driver.commit_space(first);
2719 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2720 driver.skip_matching();
2721
2722 let mut second = driver.get_next_space();
2723 second[..12].copy_from_slice(b"abcabcabcabc");
2724 second.truncate(12);
2725 driver.commit_space(second);
2726
2727 let mut reconstructed = b"abcabcabcabc".to_vec();
2728 driver.start_matching(|seq| match seq {
2729 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2730 Sequence::Triple {
2731 literals,
2732 offset,
2733 match_len,
2734 } => {
2735 reconstructed.extend_from_slice(literals);
2736 let start = reconstructed.len() - offset;
2737 for i in 0..match_len {
2738 let byte = reconstructed[start + i];
2739 reconstructed.push(byte);
2740 }
2741 }
2742 });
2743 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2744
2745 driver.reset(CompressionLevel::Fastest);
2746 assert_eq!(driver.window_size(), (1u64 << 17));
2747}
2748
2749#[test]
2750fn driver_level4_selects_row_backend() {
2751 let mut driver = MatchGeneratorDriver::new(32, 2);
2752 driver.reset(CompressionLevel::Level(4));
2753 assert_eq!(driver.active_backend, MatcherBackend::Row);
2754}
2755
2756#[test]
2757fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2758 let mut driver = MatchGeneratorDriver::new(32, 2);
2759
2760 driver.reset(CompressionLevel::Level(2));
2761 let mut space = driver.get_next_space();
2762 space[..12].copy_from_slice(b"abcabcabcabc");
2763 space.truncate(12);
2764 driver.commit_space(space);
2765 driver.skip_matching();
2766 let full_tables = driver.dfast_matcher().short_hash.len();
2767 assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2768
2769 driver.set_source_size_hint(1024);
2770 driver.reset(CompressionLevel::Level(2));
2771 let mut space = driver.get_next_space();
2772 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2773 space.truncate(12);
2774 driver.commit_space(space);
2775 driver.skip_matching();
2776 let hinted_tables = driver.dfast_matcher().short_hash.len();
2777
2778 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2779 assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
2780 assert!(
2781 hinted_tables < full_tables,
2782 "tiny source hint should reduce dfast table footprint"
2783 );
2784}
2785
2786#[test]
2787fn driver_small_source_hint_shrinks_row_hash_tables() {
2788 let mut driver = MatchGeneratorDriver::new(32, 2);
2789
2790 driver.reset(CompressionLevel::Level(4));
2791 let mut space = driver.get_next_space();
2792 space[..12].copy_from_slice(b"abcabcabcabc");
2793 space.truncate(12);
2794 driver.commit_space(space);
2795 driver.skip_matching();
2796 let full_rows = driver.row_matcher().row_heads.len();
2797 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
2798
2799 driver.set_source_size_hint(1024);
2800 driver.reset(CompressionLevel::Level(4));
2801 let mut space = driver.get_next_space();
2802 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2803 space.truncate(12);
2804 driver.commit_space(space);
2805 driver.skip_matching();
2806 let hinted_rows = driver.row_matcher().row_heads.len();
2807
2808 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2809 assert_eq!(
2810 hinted_rows,
2811 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
2812 );
2813 assert!(
2814 hinted_rows < full_rows,
2815 "tiny source hint should reduce row hash table footprint"
2816 );
2817}
2818
2819#[test]
2820fn row_matches_roundtrip_multi_block_pattern() {
2821 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
2822 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2823 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2824
2825 let mut matcher = RowMatchGenerator::new(1 << 22);
2826 matcher.configure(ROW_CONFIG);
2827 matcher.ensure_tables();
2828 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2829 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2830 Sequence::Triple {
2831 literals,
2832 offset,
2833 match_len,
2834 } => {
2835 decoded.extend_from_slice(literals);
2836 let start = decoded.len() - offset;
2837 for i in 0..match_len {
2838 let byte = decoded[start + i];
2839 decoded.push(byte);
2840 }
2841 }
2842 };
2843
2844 matcher.add_data(first_block.clone(), |_| {});
2845 let mut history = Vec::new();
2846 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2847 assert_eq!(history, first_block);
2848
2849 matcher.add_data(second_block.clone(), |_| {});
2850 let prefix_len = history.len();
2851 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2852
2853 assert_eq!(&history[prefix_len..], second_block.as_slice());
2854
2855 let third_block: Vec<u8> = (0u8..=255).collect();
2857 matcher.add_data(third_block.clone(), |_| {});
2858 let third_prefix = history.len();
2859 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2860 assert_eq!(&history[third_prefix..], third_block.as_slice());
2861}
2862
2863#[test]
2864fn row_short_block_emits_literals_only() {
2865 let mut matcher = RowMatchGenerator::new(1 << 22);
2866 matcher.configure(ROW_CONFIG);
2867
2868 matcher.add_data(b"abcde".to_vec(), |_| {});
2869
2870 let mut saw_triple = false;
2871 let mut reconstructed = Vec::new();
2872 matcher.start_matching(|seq| match seq {
2873 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2874 Sequence::Triple { .. } => saw_triple = true,
2875 });
2876
2877 assert!(
2878 !saw_triple,
2879 "row backend must not emit triples for short blocks"
2880 );
2881 assert_eq!(reconstructed, b"abcde");
2882
2883 saw_triple = false;
2885 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
2886 matcher.start_matching(|seq| {
2887 if let Sequence::Triple { .. } = seq {
2888 saw_triple = true;
2889 }
2890 });
2891 assert!(
2892 saw_triple,
2893 "row backend should emit triples on repeated data"
2894 );
2895}
2896
2897#[test]
2898fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
2899 let mut matcher = RowMatchGenerator::new(1 << 22);
2900 matcher.configure(ROW_CONFIG);
2901 matcher.add_data(b"abcabc".to_vec(), |_| {});
2902
2903 let best = MatchCandidate {
2904 start: 0,
2905 offset: 1,
2906 match_len: ROW_MIN_MATCH_LEN,
2907 };
2908 let picked = matcher
2909 .pick_lazy_match(0, 0, Some(best))
2910 .expect("best candidate must survive");
2911
2912 assert_eq!(picked.start, best.start);
2913 assert_eq!(picked.offset, best.offset);
2914 assert_eq!(picked.match_len, best.match_len);
2915}
2916
2917#[test]
2918fn row_backfills_previous_block_tail_for_cross_boundary_match() {
2919 let mut matcher = RowMatchGenerator::new(1 << 22);
2920 matcher.configure(ROW_CONFIG);
2921
2922 let mut first_block = alloc::vec![0xA5; 64];
2923 first_block.extend_from_slice(b"XYZ");
2924 let second_block = b"XYZXYZtail".to_vec();
2925
2926 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2927 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2928 Sequence::Triple {
2929 literals,
2930 offset,
2931 match_len,
2932 } => {
2933 decoded.extend_from_slice(literals);
2934 let start = decoded.len() - offset;
2935 for i in 0..match_len {
2936 let byte = decoded[start + i];
2937 decoded.push(byte);
2938 }
2939 }
2940 };
2941
2942 matcher.add_data(first_block.clone(), |_| {});
2943 let mut reconstructed = Vec::new();
2944 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
2945 assert_eq!(reconstructed, first_block);
2946
2947 matcher.add_data(second_block.clone(), |_| {});
2948 let mut saw_cross_boundary = false;
2949 let prefix_len = reconstructed.len();
2950 matcher.start_matching(|seq| {
2951 if let Sequence::Triple {
2952 literals,
2953 offset,
2954 match_len,
2955 } = seq
2956 && literals.is_empty()
2957 && offset == 3
2958 && match_len >= ROW_MIN_MATCH_LEN
2959 {
2960 saw_cross_boundary = true;
2961 }
2962 replay_sequence(&mut reconstructed, seq);
2963 });
2964
2965 assert!(
2966 saw_cross_boundary,
2967 "row matcher should reuse the 3-byte previous-block tail"
2968 );
2969 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
2970}
2971
2972#[test]
2973fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
2974 let mut driver = MatchGeneratorDriver::new(32, 2);
2975
2976 driver.reset(CompressionLevel::Level(2));
2977 let mut space = driver.get_next_space();
2978 space[..12].copy_from_slice(b"abcabcabcabc");
2979 space.truncate(12);
2980 driver.commit_space(space);
2981 driver.skip_matching();
2982
2983 let table_len = driver.dfast_matcher().short_hash.len();
2984 assert_eq!(
2985 table_len,
2986 1 << DFAST_HASH_BITS,
2987 "unhinted Level(2) should keep default dfast table size"
2988 );
2989}
2990
2991#[test]
2992fn simple_backend_rejects_undersized_pooled_suffix_store() {
2993 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2994 driver.reset(CompressionLevel::Fastest);
2995
2996 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
2997
2998 let mut space = driver.get_next_space();
2999 space.clear();
3000 space.resize(4096, 0xAB);
3001 driver.commit_space(space);
3002
3003 let last_suffix_slots = driver
3004 .match_generator
3005 .window
3006 .last()
3007 .expect("window entry must exist after commit")
3008 .suffixes
3009 .slots
3010 .len();
3011 assert!(
3012 last_suffix_slots >= 4096,
3013 "undersized pooled suffix store must not be reused for larger blocks"
3014 );
3015}
3016
3017#[test]
3018fn source_hint_clamps_driver_slice_size_to_window() {
3019 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3020 driver.set_source_size_hint(1024);
3021 driver.reset(CompressionLevel::Default);
3022
3023 let window = driver.window_size() as usize;
3024 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
3025 assert_eq!(driver.slice_size, window);
3026
3027 let space = driver.get_next_space();
3028 assert_eq!(space.len(), window);
3029 driver.commit_space(space);
3030}
3031
3032#[test]
3033fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3034 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3035 driver.reset(CompressionLevel::Default);
3036
3037 let large = driver.get_next_space();
3038 let large_capacity = large.capacity();
3039 assert!(large_capacity >= 128 * 1024);
3040 driver.commit_space(large);
3041
3042 driver.set_source_size_hint(1024);
3043 driver.reset(CompressionLevel::Default);
3044
3045 let small = driver.get_next_space();
3046 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
3047 assert!(
3048 small.capacity() >= large_capacity,
3049 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3050 );
3051}
3052
3053#[test]
3054fn driver_best_to_fastest_releases_oversized_hc_tables() {
3055 let mut driver = MatchGeneratorDriver::new(32, 2);
3056
3057 driver.reset(CompressionLevel::Best);
3059 assert_eq!(driver.window_size(), (1u64 << 24));
3060
3061 let mut space = driver.get_next_space();
3063 space[..12].copy_from_slice(b"abcabcabcabc");
3064 space.truncate(12);
3065 driver.commit_space(space);
3066 driver.skip_matching();
3067
3068 driver.reset(CompressionLevel::Fastest);
3070 assert_eq!(driver.window_size(), (1u64 << 17));
3071
3072 let hc = driver.hc_match_generator.as_ref().unwrap();
3074 assert!(
3075 hc.hash_table.is_empty(),
3076 "HC hash_table should be released after switching away from Best"
3077 );
3078 assert!(
3079 hc.chain_table.is_empty(),
3080 "HC chain_table should be released after switching away from Best"
3081 );
3082}
3083
3084#[test]
3085fn driver_better_to_best_resizes_hc_tables() {
3086 let mut driver = MatchGeneratorDriver::new(32, 2);
3087
3088 driver.reset(CompressionLevel::Better);
3090 assert_eq!(driver.window_size(), (1u64 << 23));
3091
3092 let mut space = driver.get_next_space();
3093 space[..12].copy_from_slice(b"abcabcabcabc");
3094 space.truncate(12);
3095 driver.commit_space(space);
3096 driver.skip_matching();
3097
3098 let hc = driver.hc_match_generator.as_ref().unwrap();
3099 let better_hash_len = hc.hash_table.len();
3100 let better_chain_len = hc.chain_table.len();
3101
3102 driver.reset(CompressionLevel::Best);
3104 assert_eq!(driver.window_size(), (1u64 << 24));
3105
3106 let mut space = driver.get_next_space();
3108 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3109 space.truncate(12);
3110 driver.commit_space(space);
3111 driver.skip_matching();
3112
3113 let hc = driver.hc_match_generator.as_ref().unwrap();
3114 assert!(
3115 hc.hash_table.len() > better_hash_len,
3116 "Best hash_table ({}) should be larger than Better ({})",
3117 hc.hash_table.len(),
3118 better_hash_len
3119 );
3120 assert!(
3121 hc.chain_table.len() > better_chain_len,
3122 "Best chain_table ({}) should be larger than Better ({})",
3123 hc.chain_table.len(),
3124 better_chain_len
3125 );
3126}
3127
3128#[test]
3129fn prime_with_dictionary_preserves_history_for_first_full_block() {
3130 let mut driver = MatchGeneratorDriver::new(8, 1);
3131 driver.reset(CompressionLevel::Fastest);
3132
3133 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3134
3135 let mut space = driver.get_next_space();
3136 space.clear();
3137 space.extend_from_slice(b"abcdefgh");
3138 driver.commit_space(space);
3139
3140 let mut saw_match = false;
3141 driver.start_matching(|seq| {
3142 if let Sequence::Triple {
3143 literals,
3144 offset,
3145 match_len,
3146 } = seq
3147 && literals.is_empty()
3148 && offset == 8
3149 && match_len >= MIN_MATCH_LEN
3150 {
3151 saw_match = true;
3152 }
3153 });
3154
3155 assert!(
3156 saw_match,
3157 "first full block should still match dictionary-primed history"
3158 );
3159}
3160
3161#[test]
3162fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3163 let mut driver = MatchGeneratorDriver::new(8, 1);
3164 driver.reset(CompressionLevel::Fastest);
3165
3166 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3167
3168 let mut space = driver.get_next_space();
3169 space.clear();
3170 space.extend_from_slice(b"abcdefgh");
3171 driver.commit_space(space);
3172
3173 let mut saw_match = false;
3174 driver.start_matching(|seq| {
3175 if let Sequence::Triple {
3176 literals,
3177 offset,
3178 match_len,
3179 } = seq
3180 && literals.is_empty()
3181 && offset == 24
3182 && match_len >= MIN_MATCH_LEN
3183 {
3184 saw_match = true;
3185 }
3186 });
3187
3188 assert!(
3189 saw_match,
3190 "dictionary bytes should remain addressable until frame output exceeds the live window"
3191 );
3192}
3193
3194#[test]
3195fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3196 let mut driver = MatchGeneratorDriver::new(8, 1);
3197 driver.reset(CompressionLevel::Fastest);
3198
3199 driver.prime_with_dictionary(&[], [11, 7, 3]);
3200
3201 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3202}
3203
3204#[test]
3205fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3206 let mut driver = MatchGeneratorDriver::new(8, 1);
3207 driver.reset(CompressionLevel::Level(2));
3208
3209 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3210
3211 let mut space = driver.get_next_space();
3212 space.clear();
3213 space.extend_from_slice(b"abcdefgh");
3214 driver.commit_space(space);
3215
3216 let mut saw_match = false;
3217 driver.start_matching(|seq| {
3218 if let Sequence::Triple {
3219 literals,
3220 offset,
3221 match_len,
3222 } = seq
3223 && literals.is_empty()
3224 && offset == 8
3225 && match_len >= DFAST_MIN_MATCH_LEN
3226 {
3227 saw_match = true;
3228 }
3229 });
3230
3231 assert!(
3232 saw_match,
3233 "dfast backend should match dictionary-primed history in first full block"
3234 );
3235}
3236
3237#[test]
3238fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3239 let mut driver = MatchGeneratorDriver::new(8, 1);
3240 driver.reset(CompressionLevel::Fastest);
3241
3242 let before = driver.window_size();
3243 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3244 let after = driver.window_size();
3245
3246 assert_eq!(
3247 after, before,
3248 "dictionary retention budget must not change reported frame window size"
3249 );
3250}
3251
3252#[test]
3253fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3254 let mut driver = MatchGeneratorDriver::new(8, 2);
3255 driver.reset(CompressionLevel::Fastest);
3256
3257 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3260
3261 assert!(
3262 driver
3263 .match_generator
3264 .window
3265 .iter()
3266 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3267 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3268 );
3269}
3270
3271#[test]
3272fn prime_with_dictionary_counts_only_committed_tail_budget() {
3273 let mut driver = MatchGeneratorDriver::new(8, 1);
3274 driver.reset(CompressionLevel::Fastest);
3275
3276 let before = driver.match_generator.max_window_size;
3277 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3279
3280 assert_eq!(
3281 driver.match_generator.max_window_size,
3282 before + 8,
3283 "retention budget must account only for dictionary bytes actually committed to history"
3284 );
3285}
3286
3287#[test]
3288fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3289 let mut driver = MatchGeneratorDriver::new(8, 1);
3290 driver.reset(CompressionLevel::Level(2));
3291
3292 let before = driver.dfast_matcher().max_window_size;
3293 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3296
3297 assert_eq!(
3298 driver.dfast_matcher().max_window_size,
3299 before + 12,
3300 "dfast retention budget should include 4-byte dictionary tails"
3301 );
3302}
3303
3304#[test]
3305fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3306 let mut driver = MatchGeneratorDriver::new(8, 1);
3307 driver.reset(CompressionLevel::Level(4));
3308
3309 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3310
3311 let mut space = driver.get_next_space();
3312 space.clear();
3313 space.extend_from_slice(b"abcdefgh");
3314 driver.commit_space(space);
3315
3316 let mut saw_match = false;
3317 driver.start_matching(|seq| {
3318 if let Sequence::Triple {
3319 literals,
3320 offset,
3321 match_len,
3322 } = seq
3323 && literals.is_empty()
3324 && offset == 8
3325 && match_len >= ROW_MIN_MATCH_LEN
3326 {
3327 saw_match = true;
3328 }
3329 });
3330
3331 assert!(
3332 saw_match,
3333 "row backend should match dictionary-primed history in first full block"
3334 );
3335}
3336
3337#[test]
3338fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3339 let mut driver = MatchGeneratorDriver::new(8, 1);
3340 driver.reset(CompressionLevel::Level(4));
3341
3342 let base_window = driver.row_matcher().max_window_size;
3343 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3346
3347 assert_eq!(
3348 driver.row_matcher().max_window_size,
3349 base_window + 8,
3350 "row retained window must exclude uncommitted 1-byte tail"
3351 );
3352}
3353
3354#[test]
3355fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3356 let mut driver = MatchGeneratorDriver::new(8, 1);
3357 driver.reset(CompressionLevel::Level(4));
3358 driver.row_matcher_mut().max_window_size = 8;
3360 driver.reported_window_size = 8;
3361
3362 let base_window = driver.row_matcher().max_window_size;
3363 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3364 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3365
3366 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3367 let mut space = driver.get_next_space();
3368 space.clear();
3369 space.extend_from_slice(block);
3370 driver.commit_space(space);
3371 driver.skip_matching();
3372 }
3373
3374 assert_eq!(
3375 driver.dictionary_retained_budget, 0,
3376 "dictionary budget should be fully retired once primed dict slices are evicted"
3377 );
3378 assert_eq!(
3379 driver.row_matcher().max_window_size,
3380 base_window,
3381 "retired dictionary budget must not remain reusable for live history"
3382 );
3383}
3384
3385#[test]
3386fn row_get_last_space_and_reset_to_fastest_clears_window() {
3387 let mut driver = MatchGeneratorDriver::new(8, 1);
3388 driver.reset(CompressionLevel::Level(4));
3389
3390 let mut space = driver.get_next_space();
3391 space.clear();
3392 space.extend_from_slice(b"row-data");
3393 driver.commit_space(space);
3394
3395 assert_eq!(driver.get_last_space(), b"row-data");
3396
3397 driver.reset(CompressionLevel::Fastest);
3398 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3399 assert!(driver.row_matcher().window.is_empty());
3400}
3401
3402#[test]
3404fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3405 let mut driver = MatchGeneratorDriver::new(8, 1);
3406 driver.reset(CompressionLevel::Level(4));
3407 assert_eq!(driver.active_backend, MatcherBackend::Row);
3408
3409 let _ = driver.row_matcher();
3412 let mut space = driver.get_next_space();
3413 space.extend_from_slice(b"row-data-to-recycle");
3414 driver.commit_space(space);
3415
3416 let before_pool = driver.vec_pool.len();
3417 driver.reset(CompressionLevel::Fastest);
3418
3419 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3420 let row = driver
3421 .row_match_generator
3422 .as_ref()
3423 .expect("row matcher should remain allocated after switch");
3424 assert!(row.row_heads.is_empty());
3425 assert!(row.row_positions.is_empty());
3426 assert!(row.row_tags.is_empty());
3427 assert!(
3428 driver.vec_pool.len() >= before_pool,
3429 "row reset should recycle row history buffers"
3430 );
3431}
3432
3433#[test]
3435fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
3436 let mut driver = MatchGeneratorDriver::new(8, 1);
3437 driver.active_backend = MatcherBackend::Row;
3438 driver.row_match_generator = None;
3439
3440 driver.reset(CompressionLevel::Fastest);
3441
3442 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3443}
3444
3445#[test]
3446fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
3447 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
3448 params.window_log = 22;
3449 let adjusted = adjust_params_for_source_size(params, 0);
3450 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
3451}
3452
3453#[test]
3454fn row_pick_lazy_returns_none_when_next_is_better() {
3455 let mut matcher = RowMatchGenerator::new(1 << 22);
3456 matcher.configure(ROW_CONFIG);
3457 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
3458 matcher.ensure_tables();
3459
3460 let abs_pos = matcher.history_abs_start + 16;
3461 let best = MatchCandidate {
3462 start: abs_pos,
3463 offset: 8,
3464 match_len: ROW_MIN_MATCH_LEN,
3465 };
3466 assert!(
3467 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3468 "lazy picker should defer when next position is clearly better"
3469 );
3470}
3471
3472#[test]
3473fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
3474 let mut matcher = RowMatchGenerator::new(1 << 22);
3475 matcher.configure(ROW_CONFIG);
3476 matcher.lazy_depth = 2;
3477 matcher.search_depth = 0;
3478 matcher.offset_hist = [6, 9, 1];
3479
3480 let mut data = alloc::vec![b'x'; 40];
3481 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
3482 matcher.add_data(data, |_| {});
3483 matcher.ensure_tables();
3484
3485 let abs_pos = matcher.history_abs_start + 20;
3486 let best = matcher
3487 .best_match(abs_pos, 0)
3488 .expect("expected baseline repcode match");
3489 assert_eq!(best.offset, 9);
3490 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3491
3492 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
3493 assert!(next.match_len <= best.match_len);
3494 }
3495
3496 let next2 = matcher
3497 .best_match(abs_pos + 2, 2)
3498 .expect("expected +2 candidate");
3499 assert!(
3500 next2.match_len > best.match_len + 1,
3501 "+2 candidate must be significantly better for depth-2 lazy skip"
3502 );
3503 assert!(
3504 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3505 "lazy picker should defer when +2 candidate is significantly better"
3506 );
3507}
3508
3509#[test]
3510fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
3511 let mut matcher = RowMatchGenerator::new(1 << 22);
3512 matcher.configure(ROW_CONFIG);
3513 matcher.lazy_depth = 2;
3514 matcher.search_depth = 0;
3515 matcher.offset_hist = [6, 9, 1];
3516
3517 let mut data = alloc::vec![b'x'; 40];
3518 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
3519 matcher.add_data(data, |_| {});
3520 matcher.ensure_tables();
3521
3522 let abs_pos = matcher.history_abs_start + 20;
3523 let best = matcher
3524 .best_match(abs_pos, 0)
3525 .expect("expected baseline repcode match");
3526 assert_eq!(best.offset, 9);
3527 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3528
3529 let next2 = matcher
3530 .best_match(abs_pos + 2, 2)
3531 .expect("expected +2 candidate");
3532 assert_eq!(next2.match_len, best.match_len + 1);
3533 let chosen = matcher
3534 .pick_lazy_match(abs_pos, 0, Some(best))
3535 .expect("lazy picker should keep current best");
3536 assert_eq!(chosen.start, best.start);
3537 assert_eq!(chosen.offset, best.offset);
3538 assert_eq!(chosen.match_len, best.match_len);
3539}
3540
3541#[test]
3543fn row_hash_and_row_extracts_high_bits() {
3544 let mut matcher = RowMatchGenerator::new(1 << 22);
3545 matcher.configure(ROW_CONFIG);
3546 matcher.add_data(
3547 alloc::vec![
3548 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
3549 0x70, 0x80,
3550 ],
3551 |_| {},
3552 );
3553 matcher.ensure_tables();
3554
3555 let pos = matcher.history_abs_start + 8;
3556 let (row, tag) = matcher
3557 .hash_and_row(pos)
3558 .expect("row hash should be available");
3559
3560 let idx = pos - matcher.history_abs_start;
3561 let concat = matcher.live_history();
3562 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
3563 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
3564 let hash = value.wrapping_mul(PRIME);
3565 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
3566 let combined = hash >> (u64::BITS as usize - total_bits);
3567 let expected_row =
3568 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
3569 let expected_tag = combined as u8;
3570
3571 assert_eq!(row, expected_row);
3572 assert_eq!(tag, expected_tag);
3573}
3574
3575#[test]
3576fn row_repcode_skips_candidate_before_history_start() {
3577 let mut matcher = RowMatchGenerator::new(1 << 22);
3578 matcher.configure(ROW_CONFIG);
3579 matcher.history = alloc::vec![b'a'; 20];
3580 matcher.history_start = 0;
3581 matcher.history_abs_start = 10;
3582 matcher.offset_hist = [3, 0, 0];
3583
3584 assert!(matcher.repcode_candidate(12, 1).is_none());
3585}
3586
3587#[test]
3588fn row_repcode_returns_none_when_position_too_close_to_history_end() {
3589 let mut matcher = RowMatchGenerator::new(1 << 22);
3590 matcher.configure(ROW_CONFIG);
3591 matcher.history = b"abcde".to_vec();
3592 matcher.history_start = 0;
3593 matcher.history_abs_start = 0;
3594 matcher.offset_hist = [1, 0, 0];
3595
3596 assert!(matcher.repcode_candidate(4, 1).is_none());
3597}
3598
3599#[test]
3600fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
3601 let mut matcher = RowMatchGenerator::new(1 << 22);
3602 matcher.configure(ROW_CONFIG);
3603 matcher.history = b"abcde".to_vec();
3604 matcher.history_start = 0;
3605 matcher.history_abs_start = 0;
3606
3607 assert!(matcher.row_candidate(0, 0).is_none());
3608}
3609
3610#[test]
3611fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
3612 let mut hc = HcMatchGenerator::new(32);
3613 hc.history = b"abc".to_vec();
3614 hc.history_start = 0;
3615 hc.history_abs_start = 0;
3616 hc.ensure_tables();
3617
3618 let candidates = hc.chain_candidates(0);
3619 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
3620}
3621
3622#[test]
3623fn hc_reset_refills_existing_tables_with_empty_sentinel() {
3624 let mut hc = HcMatchGenerator::new(32);
3625 hc.add_data(b"abcdeabcde".to_vec(), |_| {});
3626 hc.ensure_tables();
3627 assert!(!hc.hash_table.is_empty());
3628 assert!(!hc.chain_table.is_empty());
3629 hc.hash_table.fill(123);
3630 hc.chain_table.fill(456);
3631
3632 hc.reset(|_| {});
3633
3634 assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
3635 assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
3636}
3637
3638#[test]
3639fn hc_start_matching_returns_early_for_empty_current_block() {
3640 let mut hc = HcMatchGenerator::new(32);
3641 hc.add_data(Vec::new(), |_| {});
3642 let mut called = false;
3643 hc.start_matching(|_| called = true);
3644 assert!(!called, "empty current block should not emit sequences");
3645}
3646
3647#[test]
3648fn hc_compact_history_drains_when_threshold_crossed() {
3649 let mut hc = HcMatchGenerator::new(8);
3650 hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
3651 hc.history_start = 16;
3652 hc.compact_history();
3653 assert_eq!(hc.history_start, 0);
3654 assert_eq!(hc.history, b"qrstuvwxyz");
3655}
3656
3657#[test]
3658fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
3659 let mut hc = HcMatchGenerator::new(32);
3660 hc.history = b"abcdefghijklmnop".to_vec();
3661 hc.history_abs_start = 0;
3662 hc.position_base = 1;
3663 hc.ensure_tables();
3664 let before_hash = hc.hash_table.clone();
3665 let before_chain = hc.chain_table.clone();
3666
3667 hc.insert_position_no_rebase(0);
3668
3669 assert_eq!(hc.hash_table, before_hash);
3670 assert_eq!(hc.chain_table, before_chain);
3671}
3672
3673#[test]
3674fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
3675 let mut driver = MatchGeneratorDriver::new(8, 1);
3676 driver.reset(CompressionLevel::Fastest);
3677 driver.match_generator.max_window_size = 8;
3680 driver.reported_window_size = 8;
3681
3682 let base_window = driver.match_generator.max_window_size;
3683 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3684 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
3685
3686 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3687 let mut space = driver.get_next_space();
3688 space.clear();
3689 space.extend_from_slice(block);
3690 driver.commit_space(space);
3691 driver.skip_matching();
3692 }
3693
3694 assert_eq!(
3695 driver.dictionary_retained_budget, 0,
3696 "dictionary budget should be fully retired once primed dict slices are evicted"
3697 );
3698 assert_eq!(
3699 driver.match_generator.max_window_size, base_window,
3700 "retired dictionary budget must not remain reusable for live history"
3701 );
3702}
3703
3704#[test]
3705fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
3706 let mut driver = MatchGeneratorDriver::new(8, 1);
3707 driver.reset(CompressionLevel::Level(2));
3708 driver.dfast_matcher_mut().max_window_size = 8;
3711 driver.reported_window_size = 8;
3712
3713 let base_window = driver.dfast_matcher().max_window_size;
3714 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3715 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
3716
3717 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3718 let mut space = driver.get_next_space();
3719 space.clear();
3720 space.extend_from_slice(block);
3721 driver.commit_space(space);
3722 driver.skip_matching();
3723 }
3724
3725 assert_eq!(
3726 driver.dictionary_retained_budget, 0,
3727 "dictionary budget should be fully retired once primed dict slices are evicted"
3728 );
3729 assert_eq!(
3730 driver.dfast_matcher().max_window_size,
3731 base_window,
3732 "retired dictionary budget must not remain reusable for live history"
3733 );
3734}
3735
3736#[test]
3737fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
3738 let mut driver = MatchGeneratorDriver::new(8, 1);
3739 driver.reset(CompressionLevel::Better);
3740
3741 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3742
3743 let mut space = driver.get_next_space();
3744 space.clear();
3745 space.extend_from_slice(b"abcdefgh");
3748 driver.commit_space(space);
3749
3750 let mut saw_match = false;
3751 driver.start_matching(|seq| {
3752 if let Sequence::Triple {
3753 literals,
3754 offset,
3755 match_len,
3756 } = seq
3757 && literals.is_empty()
3758 && offset == 8
3759 && match_len >= HC_MIN_MATCH_LEN
3760 {
3761 saw_match = true;
3762 }
3763 });
3764
3765 assert!(
3766 saw_match,
3767 "hash-chain backend should match dictionary-primed history in first full block"
3768 );
3769}
3770
3771#[test]
3772fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
3773 let mut driver = MatchGeneratorDriver::new(8, 1);
3774 driver.reset(CompressionLevel::Better);
3775 driver.hc_matcher_mut().max_window_size = 8;
3777 driver.reported_window_size = 8;
3778
3779 let base_window = driver.hc_matcher().max_window_size;
3780 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3781 assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
3782
3783 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3784 let mut space = driver.get_next_space();
3785 space.clear();
3786 space.extend_from_slice(block);
3787 driver.commit_space(space);
3788 driver.skip_matching();
3789 }
3790
3791 assert_eq!(
3792 driver.dictionary_retained_budget, 0,
3793 "dictionary budget should be fully retired once primed dict slices are evicted"
3794 );
3795 assert_eq!(
3796 driver.hc_matcher().max_window_size,
3797 base_window,
3798 "retired dictionary budget must not remain reusable for live history"
3799 );
3800}
3801
3802#[test]
3803fn hc_rebases_positions_after_u32_boundary() {
3804 let mut matcher = HcMatchGenerator::new(64);
3805 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3806 matcher.ensure_tables();
3807 matcher.position_base = 0;
3808 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3809 Ok(value) => value,
3810 Err(_) => return,
3811 };
3812 matcher.history_abs_start = history_abs_start;
3815 matcher.skip_matching();
3816 assert_eq!(
3817 matcher.position_base, matcher.history_abs_start,
3818 "rebase should anchor to the oldest live absolute position"
3819 );
3820
3821 assert!(
3822 matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
3823 "HC hash table should still be populated after crossing u32 boundary"
3824 );
3825
3826 let abs_pos = matcher.history_abs_start + 10;
3828 let candidates = matcher.chain_candidates(abs_pos);
3829 assert!(
3830 candidates.iter().any(|candidate| *candidate != usize::MAX),
3831 "chain_candidates should return valid matches after rebase"
3832 );
3833}
3834
3835#[test]
3836fn hc_rebase_rebuilds_only_inserted_prefix() {
3837 let mut matcher = HcMatchGenerator::new(64);
3838 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3839 matcher.ensure_tables();
3840 matcher.position_base = 0;
3841 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3842 Ok(value) => value,
3843 Err(_) => return,
3844 };
3845 matcher.history_abs_start = history_abs_start;
3846 let abs_pos = matcher.history_abs_start + 6;
3847
3848 let mut expected = HcMatchGenerator::new(64);
3849 expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3850 expected.ensure_tables();
3851 expected.history_abs_start = history_abs_start;
3852 expected.position_base = expected.history_abs_start;
3853 expected.hash_table.fill(HC_EMPTY);
3854 expected.chain_table.fill(HC_EMPTY);
3855 for pos in expected.history_abs_start..abs_pos {
3856 expected.insert_position_no_rebase(pos);
3857 }
3858
3859 matcher.maybe_rebase_positions(abs_pos);
3860
3861 assert_eq!(
3862 matcher.position_base, matcher.history_abs_start,
3863 "rebase should still anchor to the oldest live absolute position"
3864 );
3865 assert_eq!(
3866 matcher.hash_table, expected.hash_table,
3867 "rebase must rebuild only positions already inserted before abs_pos"
3868 );
3869 assert_eq!(
3870 matcher.chain_table, expected.chain_table,
3871 "future positions must not be pre-seeded into HC chains during rebase"
3872 );
3873}
3874
3875#[test]
3876fn suffix_store_with_single_slot_does_not_panic_on_keying() {
3877 let mut suffixes = SuffixStore::with_capacity(1);
3878 suffixes.insert(b"abcde", 0);
3879 assert!(suffixes.contains_key(b"abcde"));
3880 assert_eq!(suffixes.get(b"abcde"), Some(0));
3881}
3882
3883#[test]
3884fn fastest_reset_uses_interleaved_hash_fill_step() {
3885 let mut driver = MatchGeneratorDriver::new(32, 2);
3886
3887 driver.reset(CompressionLevel::Uncompressed);
3888 assert_eq!(driver.match_generator.hash_fill_step, 1);
3889
3890 driver.reset(CompressionLevel::Fastest);
3891 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
3892
3893 driver.reset(CompressionLevel::Better);
3896 assert_eq!(driver.active_backend, MatcherBackend::HashChain);
3897 assert_eq!(driver.window_size(), (1u64 << 23));
3898 assert_eq!(driver.hc_matcher().lazy_depth, 2);
3899}
3900
3901#[test]
3902fn simple_matcher_updates_offset_history_after_emitting_match() {
3903 let mut matcher = MatchGenerator::new(64);
3904 matcher.add_data(
3905 b"abcdeabcdeabcde".to_vec(),
3906 SuffixStore::with_capacity(64),
3907 |_, _| {},
3908 );
3909
3910 assert!(matcher.next_sequence(|seq| {
3911 assert_eq!(
3912 seq,
3913 Sequence::Triple {
3914 literals: b"abcde",
3915 offset: 5,
3916 match_len: 10,
3917 }
3918 );
3919 }));
3920 assert_eq!(matcher.offset_hist, [5, 1, 4]);
3921}
3922
3923#[test]
3924fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
3925 let mut matcher = MatchGenerator::new(64);
3926 matcher.add_data(
3927 b"abcdefghijabcdefghij".to_vec(),
3928 SuffixStore::with_capacity(64),
3929 |_, _| {},
3930 );
3931
3932 matcher.suffix_idx = 10;
3933 matcher.last_idx_in_sequence = 10;
3934 matcher.offset_hist = [99, 10, 4];
3935
3936 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3937 assert_eq!(candidate, Some((10, 10)));
3938}
3939
3940#[test]
3941fn simple_matcher_repcode_can_target_previous_window_entry() {
3942 let mut matcher = MatchGenerator::new(64);
3943 matcher.add_data(
3944 b"abcdefghij".to_vec(),
3945 SuffixStore::with_capacity(64),
3946 |_, _| {},
3947 );
3948 matcher.skip_matching();
3949 matcher.add_data(
3950 b"abcdefghij".to_vec(),
3951 SuffixStore::with_capacity(64),
3952 |_, _| {},
3953 );
3954
3955 matcher.offset_hist = [99, 10, 4];
3956
3957 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
3958 assert_eq!(candidate, Some((10, 10)));
3959}
3960
3961#[test]
3962fn simple_matcher_zero_literal_repcode_checks_rep2() {
3963 let mut matcher = MatchGenerator::new(64);
3964 matcher.add_data(
3965 b"abcdefghijabcdefghij".to_vec(),
3966 SuffixStore::with_capacity(64),
3967 |_, _| {},
3968 );
3969 matcher.suffix_idx = 10;
3970 matcher.last_idx_in_sequence = 10;
3971 matcher.offset_hist = [99, 4, 10];
3973
3974 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3975 assert_eq!(candidate, Some((10, 10)));
3976}
3977
3978#[test]
3979fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
3980 let mut matcher = MatchGenerator::new(64);
3981 matcher.add_data(
3982 b"abcdefghijabcdefghij".to_vec(),
3983 SuffixStore::with_capacity(64),
3984 |_, _| {},
3985 );
3986 matcher.suffix_idx = 10;
3987 matcher.last_idx_in_sequence = 10;
3988 matcher.offset_hist = [11, 4, 99];
3990
3991 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3992 assert_eq!(candidate, Some((10, 10)));
3993}
3994
3995#[test]
3996fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
3997 let mut matcher = MatchGenerator::new(64);
3998 matcher.add_data(
3999 b"abcdefghij".to_vec(),
4000 SuffixStore::with_capacity(64),
4001 |_, _| {},
4002 );
4003 matcher.skip_matching();
4004 matcher.add_data(
4005 b"klmnopqrst".to_vec(),
4006 SuffixStore::with_capacity(64),
4007 |_, _| {},
4008 );
4009 matcher.suffix_idx = 3;
4010
4011 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4012 assert_eq!(candidate, None);
4013}
4014
4015#[test]
4016fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4017 let mut matcher = MatchGenerator::new(64);
4018 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4019 matcher.add_data(
4020 b"abcdefghijklmnop".to_vec(),
4021 SuffixStore::with_capacity(64),
4022 |_, _| {},
4023 );
4024 matcher.skip_matching();
4025 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4026
4027 assert!(matcher.next_sequence(|seq| {
4028 assert_eq!(
4029 seq,
4030 Sequence::Triple {
4031 literals: b"",
4032 offset: 15,
4033 match_len: 5,
4034 }
4035 );
4036 }));
4037 assert!(!matcher.next_sequence(|_| {}));
4038}
4039
4040#[test]
4041fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4042 let mut matcher = MatchGenerator::new(64);
4043 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4044 matcher.add_data(
4045 b"01234abcde".to_vec(),
4046 SuffixStore::with_capacity(64),
4047 |_, _| {},
4048 );
4049 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4050
4051 let last = matcher.window.last().unwrap();
4052 let tail = &last.data[5..10];
4053 assert_eq!(last.suffixes.get(tail), Some(5));
4054}
4055
4056#[test]
4057fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4058 let mut matcher = MatchGenerator::new(128);
4059 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4060 matcher.add_data(
4061 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4062 SuffixStore::with_capacity(1 << 16),
4063 |_, _| {},
4064 );
4065
4066 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4067
4068 let last = matcher.window.last().unwrap();
4069 let first_key = &last.data[..MIN_MATCH_LEN];
4070 assert_eq!(last.suffixes.get(first_key), None);
4071}
4072
4073#[test]
4074fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4075 let mut matcher = MatchGenerator::new(128);
4076 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4077 matcher.add_data(
4078 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4079 SuffixStore::with_capacity(1 << 16),
4080 |_, _| {},
4081 );
4082
4083 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4084
4085 let last = matcher.window.last().unwrap();
4086 for pos in [0usize, 3, 6, 9, 12] {
4087 let key = &last.data[pos..pos + MIN_MATCH_LEN];
4088 assert_eq!(
4089 last.suffixes.get(key),
4090 Some(pos),
4091 "expected interleaved suffix registration at pos {pos}"
4092 );
4093 }
4094}
4095
4096#[test]
4097fn dfast_skip_matching_handles_window_eviction() {
4098 let mut matcher = DfastMatchGenerator::new(16);
4099
4100 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4101 matcher.skip_matching();
4102 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4103 matcher.skip_matching();
4104 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4105
4106 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4107 matcher.start_matching(|seq| match seq {
4108 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4109 Sequence::Triple {
4110 literals,
4111 offset,
4112 match_len,
4113 } => {
4114 reconstructed.extend_from_slice(literals);
4115 let start = reconstructed.len() - offset;
4116 for i in 0..match_len {
4117 let byte = reconstructed[start + i];
4118 reconstructed.push(byte);
4119 }
4120 }
4121 });
4122
4123 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4124}
4125
4126#[test]
4127fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4128 let mut matcher = DfastMatchGenerator::new(8);
4129
4130 let mut first = Vec::with_capacity(64);
4131 first.extend_from_slice(b"abcdefgh");
4132 matcher.add_data(first, |_| {});
4133
4134 let mut second = Vec::with_capacity(64);
4135 second.extend_from_slice(b"ijklmnop");
4136
4137 let mut observed_evicted_len = None;
4138 matcher.add_data(second, |data| {
4139 observed_evicted_len = Some(data.len());
4140 });
4141
4142 assert_eq!(
4143 observed_evicted_len,
4144 Some(8),
4145 "eviction callback must report evicted byte length, not backing capacity"
4146 );
4147}
4148
4149#[test]
4150fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4151 let mut matcher = DfastMatchGenerator::new(16);
4152
4153 let mut first = Vec::with_capacity(64);
4154 first.extend_from_slice(b"abcdefgh");
4155 matcher.add_data(first, |_| {});
4156
4157 let mut second = Vec::with_capacity(64);
4158 second.extend_from_slice(b"ijklmnop");
4159 matcher.add_data(second, |_| {});
4160
4161 matcher.max_window_size = 8;
4162
4163 let mut observed_evicted_len = None;
4164 matcher.trim_to_window(|data| {
4165 observed_evicted_len = Some(data.len());
4166 });
4167
4168 assert_eq!(
4169 observed_evicted_len,
4170 Some(8),
4171 "trim callback must report evicted byte length, not backing capacity"
4172 );
4173}
4174
4175#[test]
4176fn dfast_inserts_tail_positions_for_next_block_matching() {
4177 let mut matcher = DfastMatchGenerator::new(1 << 22);
4178
4179 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4180 let mut history = Vec::new();
4181 matcher.start_matching(|seq| match seq {
4182 Sequence::Literals { literals } => history.extend_from_slice(literals),
4183 Sequence::Triple { .. } => unreachable!("first block should not match history"),
4184 });
4185 assert_eq!(history, b"012345bcdea");
4186
4187 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4188 let mut saw_first_sequence = false;
4189 matcher.start_matching(|seq| {
4190 assert!(!saw_first_sequence, "expected a single cross-block match");
4191 saw_first_sequence = true;
4192 match seq {
4193 Sequence::Literals { .. } => {
4194 panic!("expected tail-anchored cross-block match before any literals")
4195 }
4196 Sequence::Triple {
4197 literals,
4198 offset,
4199 match_len,
4200 } => {
4201 assert_eq!(literals, b"");
4202 assert_eq!(offset, 5);
4203 assert_eq!(match_len, 11);
4204 let start = history.len() - offset;
4205 for i in 0..match_len {
4206 let byte = history[start + i];
4207 history.push(byte);
4208 }
4209 }
4210 }
4211 });
4212
4213 assert!(
4214 saw_first_sequence,
4215 "expected tail-anchored cross-block match"
4216 );
4217 assert_eq!(history, b"012345bcdeabcdeabcdeab");
4218}
4219
4220#[test]
4221fn fastest_hint_iteration_23_sequences_reconstruct_source() {
4222 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
4223 let mut state = seed;
4224 let mut data = Vec::with_capacity(len);
4225 for _ in 0..len {
4226 state = state
4227 .wrapping_mul(6364136223846793005)
4228 .wrapping_add(1442695040888963407);
4229 data.push((state >> 33) as u8);
4230 }
4231 data
4232 }
4233
4234 let i = 23u64;
4235 let len = (i * 89 % 16384) as usize;
4236 let mut data = generate_data(i, len);
4237 let repeat = data[128..256].to_vec();
4240 data.extend_from_slice(&repeat);
4241 data.extend_from_slice(&repeat);
4242
4243 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
4244 driver.set_source_size_hint(data.len() as u64);
4245 driver.reset(CompressionLevel::Fastest);
4246 let mut space = driver.get_next_space();
4247 space[..data.len()].copy_from_slice(&data);
4248 space.truncate(data.len());
4249 driver.commit_space(space);
4250
4251 let mut rebuilt = Vec::with_capacity(data.len());
4252 let mut saw_triple = false;
4253 driver.start_matching(|seq| match seq {
4254 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
4255 Sequence::Triple {
4256 literals,
4257 offset,
4258 match_len,
4259 } => {
4260 saw_triple = true;
4261 rebuilt.extend_from_slice(literals);
4262 assert!(offset > 0, "offset must be non-zero");
4263 assert!(
4264 offset <= rebuilt.len(),
4265 "offset must reference already-produced bytes: offset={} produced={}",
4266 offset,
4267 rebuilt.len()
4268 );
4269 let start = rebuilt.len() - offset;
4270 for idx in 0..match_len {
4271 let b = rebuilt[start + idx];
4272 rebuilt.push(b);
4273 }
4274 }
4275 });
4276
4277 assert!(saw_triple, "fixture must emit at least one match");
4278 assert_eq!(rebuilt, data);
4279}