1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const ROW_MIN_MATCH_LEN: usize = 6;
22const DFAST_TARGET_LEN: usize = 48;
23const DFAST_HASH_BITS: usize = 20;
26const DFAST_SEARCH_DEPTH: usize = 4;
27const DFAST_EMPTY_SLOT: usize = usize::MAX;
28const ROW_HASH_BITS: usize = 20;
29const ROW_LOG: usize = 5;
30const ROW_SEARCH_DEPTH: usize = 16;
31const ROW_TARGET_LEN: usize = 48;
32const ROW_TAG_BITS: usize = 8;
33const ROW_EMPTY_SLOT: usize = usize::MAX;
34const ROW_HASH_KEY_LEN: usize = 4;
35
36const HC_HASH_LOG: usize = 20;
37const HC_CHAIN_LOG: usize = 19;
38const HC_SEARCH_DEPTH: usize = 16;
39const HC_MIN_MATCH_LEN: usize = 5;
40const HC_TARGET_LEN: usize = 48;
41const HC_EMPTY: u32 = 0;
44
45const MAX_HC_SEARCH_DEPTH: usize = 32;
48
49#[derive(Copy, Clone)]
52struct HcConfig {
53 hash_log: usize,
54 chain_log: usize,
55 search_depth: usize,
56 target_len: usize,
57}
58
59#[derive(Copy, Clone)]
60struct RowConfig {
61 hash_bits: usize,
62 row_log: usize,
63 search_depth: usize,
64 target_len: usize,
65}
66
67const HC_CONFIG: HcConfig = HcConfig {
68 hash_log: HC_HASH_LOG,
69 chain_log: HC_CHAIN_LOG,
70 search_depth: HC_SEARCH_DEPTH,
71 target_len: HC_TARGET_LEN,
72};
73
74const BEST_HC_CONFIG: HcConfig = HcConfig {
76 hash_log: 21,
77 chain_log: 20,
78 search_depth: 32,
79 target_len: 128,
80};
81
82const ROW_CONFIG: RowConfig = RowConfig {
83 hash_bits: ROW_HASH_BITS,
84 row_log: ROW_LOG,
85 search_depth: ROW_SEARCH_DEPTH,
86 target_len: ROW_TARGET_LEN,
87};
88
89#[derive(Copy, Clone)]
91struct LevelParams {
92 backend: MatcherBackend,
93 window_log: u8,
94 hash_fill_step: usize,
95 lazy_depth: u8,
96 hc: HcConfig,
97 row: RowConfig,
98}
99
100fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
101 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
102 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
103}
104
105fn row_hash_bits_for_window(max_window_size: usize) -> usize {
106 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
107 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
108}
109
110#[rustfmt::skip]
119const LEVEL_TABLE: [LevelParams; 22] = [
120 LevelParams { backend: MatcherBackend::Simple, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
123 LevelParams { backend: MatcherBackend::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
124 LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
125 LevelParams { backend: MatcherBackend::Row, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
126 LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
127 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
128 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
129 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
130 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
131 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
132 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
133 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
134 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
135 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
136 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
137 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
138 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
139 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
140 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
141 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
142 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
143 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
144];
145
146const MIN_WINDOW_LOG: u8 = 10;
148
149fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
158 let src_log = if src_size == 0 {
162 MIN_WINDOW_LOG
163 } else {
164 (64 - (src_size - 1).leading_zeros()) as u8 };
166 let src_log = src_log.max(MIN_WINDOW_LOG);
167 if src_log < params.window_log {
168 params.window_log = src_log;
169 }
170 if params.backend == MatcherBackend::HashChain {
174 if (src_log + 2) < params.hc.hash_log as u8 {
175 params.hc.hash_log = (src_log + 2) as usize;
176 }
177 if (src_log + 1) < params.hc.chain_log as u8 {
178 params.hc.chain_log = (src_log + 1) as usize;
179 }
180 } else if params.backend == MatcherBackend::Row {
181 let max_window_size = 1usize << params.window_log;
182 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
183 }
184 params
185}
186
187fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
190 let params = match level {
191 CompressionLevel::Uncompressed => LevelParams {
192 backend: MatcherBackend::Simple,
193 window_log: 17,
194 hash_fill_step: 1,
195 lazy_depth: 0,
196 hc: HC_CONFIG,
197 row: ROW_CONFIG,
198 },
199 CompressionLevel::Fastest => LEVEL_TABLE[0],
200 CompressionLevel::Default => LEVEL_TABLE[2],
201 CompressionLevel::Better => LEVEL_TABLE[6],
202 CompressionLevel::Best => LEVEL_TABLE[10],
203 CompressionLevel::Level(n) => {
204 if n > 0 {
205 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
206 LEVEL_TABLE[idx]
207 } else if n == 0 {
208 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
210 } else {
211 let acceleration =
215 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
216 let step = (acceleration + 3).min(128);
217 LevelParams {
218 backend: MatcherBackend::Simple,
219 window_log: 17,
220 hash_fill_step: step,
221 lazy_depth: 0,
222 hc: HC_CONFIG,
223 row: ROW_CONFIG,
224 }
225 }
226 }
227 };
228 if let Some(size) = source_size {
229 adjust_params_for_source_size(params, size)
230 } else {
231 params
232 }
233}
234
235#[derive(Copy, Clone, Debug, PartialEq, Eq)]
236enum MatcherBackend {
237 Simple,
238 Dfast,
239 Row,
240 HashChain,
241}
242
243pub struct MatchGeneratorDriver {
245 vec_pool: Vec<Vec<u8>>,
246 suffix_pool: Vec<SuffixStore>,
247 match_generator: MatchGenerator,
248 dfast_match_generator: Option<DfastMatchGenerator>,
249 row_match_generator: Option<RowMatchGenerator>,
250 hc_match_generator: Option<HcMatchGenerator>,
251 active_backend: MatcherBackend,
252 slice_size: usize,
253 base_slice_size: usize,
254 reported_window_size: usize,
257 dictionary_retained_budget: usize,
260 source_size_hint: Option<u64>,
262}
263
264impl MatchGeneratorDriver {
265 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
270 let max_window_size = max_slices_in_window * slice_size;
271 Self {
272 vec_pool: Vec::new(),
273 suffix_pool: Vec::new(),
274 match_generator: MatchGenerator::new(max_window_size),
275 dfast_match_generator: None,
276 row_match_generator: None,
277 hc_match_generator: None,
278 active_backend: MatcherBackend::Simple,
279 slice_size,
280 base_slice_size: slice_size,
281 reported_window_size: max_window_size,
282 dictionary_retained_budget: 0,
283 source_size_hint: None,
284 }
285 }
286
287 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
288 resolve_level_params(level, source_size)
289 }
290
291 fn dfast_matcher(&self) -> &DfastMatchGenerator {
292 self.dfast_match_generator
293 .as_ref()
294 .expect("dfast backend must be initialized by reset() before use")
295 }
296
297 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
298 self.dfast_match_generator
299 .as_mut()
300 .expect("dfast backend must be initialized by reset() before use")
301 }
302
303 fn row_matcher(&self) -> &RowMatchGenerator {
304 self.row_match_generator
305 .as_ref()
306 .expect("row backend must be initialized by reset() before use")
307 }
308
309 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
310 self.row_match_generator
311 .as_mut()
312 .expect("row backend must be initialized by reset() before use")
313 }
314
315 fn hc_matcher(&self) -> &HcMatchGenerator {
316 self.hc_match_generator
317 .as_ref()
318 .expect("hash chain backend must be initialized by reset() before use")
319 }
320
321 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
322 self.hc_match_generator
323 .as_mut()
324 .expect("hash chain backend must be initialized by reset() before use")
325 }
326
327 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
328 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
329 if reclaimed == 0 {
330 return;
331 }
332 self.dictionary_retained_budget -= reclaimed;
333 match self.active_backend {
334 MatcherBackend::Simple => {
335 self.match_generator.max_window_size = self
336 .match_generator
337 .max_window_size
338 .saturating_sub(reclaimed);
339 }
340 MatcherBackend::Dfast => {
341 let matcher = self.dfast_matcher_mut();
342 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
343 }
344 MatcherBackend::Row => {
345 let matcher = self.row_matcher_mut();
346 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
347 }
348 MatcherBackend::HashChain => {
349 let matcher = self.hc_matcher_mut();
350 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
351 }
352 }
353 }
354
355 fn trim_after_budget_retire(&mut self) {
356 loop {
357 let mut evicted_bytes = 0usize;
358 match self.active_backend {
359 MatcherBackend::Simple => {
360 let vec_pool = &mut self.vec_pool;
361 let suffix_pool = &mut self.suffix_pool;
362 self.match_generator.reserve(0, |mut data, mut suffixes| {
363 evicted_bytes += data.len();
364 data.resize(data.capacity(), 0);
365 vec_pool.push(data);
366 suffixes.slots.clear();
367 suffixes.slots.resize(suffixes.slots.capacity(), None);
368 suffix_pool.push(suffixes);
369 });
370 }
371 MatcherBackend::Dfast => {
372 let mut retired = Vec::new();
373 self.dfast_matcher_mut().trim_to_window(|data| {
374 evicted_bytes += data.len();
375 retired.push(data);
376 });
377 for mut data in retired {
378 data.resize(data.capacity(), 0);
379 self.vec_pool.push(data);
380 }
381 }
382 MatcherBackend::Row => {
383 let mut retired = Vec::new();
384 self.row_matcher_mut().trim_to_window(|data| {
385 evicted_bytes += data.len();
386 retired.push(data);
387 });
388 for mut data in retired {
389 data.resize(data.capacity(), 0);
390 self.vec_pool.push(data);
391 }
392 }
393 MatcherBackend::HashChain => {
394 let mut retired = Vec::new();
395 self.hc_matcher_mut().trim_to_window(|data| {
396 evicted_bytes += data.len();
397 retired.push(data);
398 });
399 for mut data in retired {
400 data.resize(data.capacity(), 0);
401 self.vec_pool.push(data);
402 }
403 }
404 }
405 if evicted_bytes == 0 {
406 break;
407 }
408 self.retire_dictionary_budget(evicted_bytes);
409 }
410 }
411}
412
413impl Matcher for MatchGeneratorDriver {
414 fn supports_dictionary_priming(&self) -> bool {
415 true
416 }
417
418 fn set_source_size_hint(&mut self, size: u64) {
419 self.source_size_hint = Some(size);
420 }
421
422 fn reset(&mut self, level: CompressionLevel) {
423 let hint = self.source_size_hint.take();
424 let hinted = hint.is_some();
425 let params = Self::level_params(level, hint);
426 let max_window_size = 1usize << params.window_log;
427 self.dictionary_retained_budget = 0;
428 if self.active_backend != params.backend {
429 match self.active_backend {
430 MatcherBackend::Simple => {
431 let vec_pool = &mut self.vec_pool;
432 let suffix_pool = &mut self.suffix_pool;
433 self.match_generator.reset(|mut data, mut suffixes| {
434 data.resize(data.capacity(), 0);
435 vec_pool.push(data);
436 suffixes.slots.clear();
437 suffixes.slots.resize(suffixes.slots.capacity(), None);
438 suffix_pool.push(suffixes);
439 });
440 }
441 MatcherBackend::Dfast => {
442 if let Some(dfast) = self.dfast_match_generator.as_mut() {
443 let vec_pool = &mut self.vec_pool;
444 dfast.reset(|mut data| {
445 data.resize(data.capacity(), 0);
446 vec_pool.push(data);
447 });
448 }
449 }
450 MatcherBackend::Row => {
451 if let Some(row) = self.row_match_generator.as_mut() {
452 row.row_heads = Vec::new();
453 row.row_positions = Vec::new();
454 row.row_tags = Vec::new();
455 let vec_pool = &mut self.vec_pool;
456 row.reset(|mut data| {
457 data.resize(data.capacity(), 0);
458 vec_pool.push(data);
459 });
460 }
461 }
462 MatcherBackend::HashChain => {
463 if let Some(hc) = self.hc_match_generator.as_mut() {
464 hc.hash_table = Vec::new();
467 hc.chain_table = Vec::new();
468 let vec_pool = &mut self.vec_pool;
469 hc.reset(|mut data| {
470 data.resize(data.capacity(), 0);
471 vec_pool.push(data);
472 });
473 }
474 }
475 }
476 }
477
478 self.active_backend = params.backend;
479 self.slice_size = self.base_slice_size.min(max_window_size);
480 self.reported_window_size = max_window_size;
481 match self.active_backend {
482 MatcherBackend::Simple => {
483 let vec_pool = &mut self.vec_pool;
484 let suffix_pool = &mut self.suffix_pool;
485 self.match_generator.max_window_size = max_window_size;
486 self.match_generator.hash_fill_step = params.hash_fill_step;
487 self.match_generator.reset(|mut data, mut suffixes| {
488 data.resize(data.capacity(), 0);
489 vec_pool.push(data);
490 suffixes.slots.clear();
491 suffixes.slots.resize(suffixes.slots.capacity(), None);
492 suffix_pool.push(suffixes);
493 });
494 }
495 MatcherBackend::Dfast => {
496 let dfast = self
497 .dfast_match_generator
498 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
499 dfast.max_window_size = max_window_size;
500 dfast.lazy_depth = params.lazy_depth;
501 dfast.set_hash_bits(if hinted {
502 dfast_hash_bits_for_window(max_window_size)
503 } else {
504 DFAST_HASH_BITS
505 });
506 let vec_pool = &mut self.vec_pool;
507 dfast.reset(|mut data| {
508 data.resize(data.capacity(), 0);
509 vec_pool.push(data);
510 });
511 }
512 MatcherBackend::Row => {
513 let row = self
514 .row_match_generator
515 .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
516 row.max_window_size = max_window_size;
517 row.lazy_depth = params.lazy_depth;
518 row.configure(params.row);
519 if hinted {
520 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
521 }
522 let vec_pool = &mut self.vec_pool;
523 row.reset(|mut data| {
524 data.resize(data.capacity(), 0);
525 vec_pool.push(data);
526 });
527 }
528 MatcherBackend::HashChain => {
529 let hc = self
530 .hc_match_generator
531 .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
532 hc.max_window_size = max_window_size;
533 hc.lazy_depth = params.lazy_depth;
534 hc.configure(params.hc);
535 let vec_pool = &mut self.vec_pool;
536 hc.reset(|mut data| {
537 data.resize(data.capacity(), 0);
538 vec_pool.push(data);
539 });
540 }
541 }
542 }
543
544 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
545 match self.active_backend {
546 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
547 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
548 MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
549 MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
550 }
551
552 if dict_content.is_empty() {
553 return;
554 }
555
556 let retained_dict_budget = dict_content.len();
559 match self.active_backend {
560 MatcherBackend::Simple => {
561 self.match_generator.max_window_size = self
562 .match_generator
563 .max_window_size
564 .saturating_add(retained_dict_budget);
565 }
566 MatcherBackend::Dfast => {
567 let matcher = self.dfast_matcher_mut();
568 matcher.max_window_size =
569 matcher.max_window_size.saturating_add(retained_dict_budget);
570 }
571 MatcherBackend::Row => {
572 let matcher = self.row_matcher_mut();
573 matcher.max_window_size =
574 matcher.max_window_size.saturating_add(retained_dict_budget);
575 }
576 MatcherBackend::HashChain => {
577 let matcher = self.hc_matcher_mut();
578 matcher.max_window_size =
579 matcher.max_window_size.saturating_add(retained_dict_budget);
580 }
581 }
582
583 let mut start = 0usize;
584 let mut committed_dict_budget = 0usize;
585 let min_primed_tail = match self.active_backend {
589 MatcherBackend::Simple => MIN_MATCH_LEN,
590 MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
591 };
592 while start < dict_content.len() {
593 let end = (start + self.slice_size).min(dict_content.len());
594 if end - start < min_primed_tail {
595 break;
596 }
597 let mut space = self.get_next_space();
598 space.clear();
599 space.extend_from_slice(&dict_content[start..end]);
600 self.commit_space(space);
601 self.skip_matching();
602 committed_dict_budget += end - start;
603 start = end;
604 }
605
606 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
607 if uncommitted_tail_budget > 0 {
608 match self.active_backend {
609 MatcherBackend::Simple => {
610 self.match_generator.max_window_size = self
611 .match_generator
612 .max_window_size
613 .saturating_sub(uncommitted_tail_budget);
614 }
615 MatcherBackend::Dfast => {
616 let matcher = self.dfast_matcher_mut();
617 matcher.max_window_size = matcher
618 .max_window_size
619 .saturating_sub(uncommitted_tail_budget);
620 }
621 MatcherBackend::Row => {
622 let matcher = self.row_matcher_mut();
623 matcher.max_window_size = matcher
624 .max_window_size
625 .saturating_sub(uncommitted_tail_budget);
626 }
627 MatcherBackend::HashChain => {
628 let matcher = self.hc_matcher_mut();
629 matcher.max_window_size = matcher
630 .max_window_size
631 .saturating_sub(uncommitted_tail_budget);
632 }
633 }
634 }
635 if committed_dict_budget > 0 {
636 self.dictionary_retained_budget = self
637 .dictionary_retained_budget
638 .saturating_add(committed_dict_budget);
639 }
640 }
641
642 fn window_size(&self) -> u64 {
643 self.reported_window_size as u64
644 }
645
646 fn get_next_space(&mut self) -> Vec<u8> {
647 if let Some(mut space) = self.vec_pool.pop() {
648 if space.len() > self.slice_size {
649 space.truncate(self.slice_size);
650 }
651 if space.len() < self.slice_size {
652 space.resize(self.slice_size, 0);
653 }
654 return space;
655 }
656 alloc::vec![0; self.slice_size]
657 }
658
659 fn get_last_space(&mut self) -> &[u8] {
660 match self.active_backend {
661 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
662 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
663 MatcherBackend::Row => self.row_matcher().get_last_space(),
664 MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
665 }
666 }
667
668 fn commit_space(&mut self, space: Vec<u8>) {
669 match self.active_backend {
670 MatcherBackend::Simple => {
671 let vec_pool = &mut self.vec_pool;
672 let mut evicted_bytes = 0usize;
673 let suffixes = match self.suffix_pool.pop() {
674 Some(store) if store.slots.len() >= space.len() => store,
675 _ => SuffixStore::with_capacity(space.len()),
676 };
677 let suffix_pool = &mut self.suffix_pool;
678 self.match_generator
679 .add_data(space, suffixes, |mut data, mut suffixes| {
680 evicted_bytes += data.len();
681 data.resize(data.capacity(), 0);
682 vec_pool.push(data);
683 suffixes.slots.clear();
684 suffixes.slots.resize(suffixes.slots.capacity(), None);
685 suffix_pool.push(suffixes);
686 });
687 self.retire_dictionary_budget(evicted_bytes);
688 self.trim_after_budget_retire();
689 }
690 MatcherBackend::Dfast => {
691 let vec_pool = &mut self.vec_pool;
692 let mut evicted_bytes = 0usize;
693 self.dfast_match_generator
694 .as_mut()
695 .expect("dfast backend must be initialized by reset() before use")
696 .add_data(space, |mut data| {
697 evicted_bytes += data.len();
698 data.resize(data.capacity(), 0);
699 vec_pool.push(data);
700 });
701 self.retire_dictionary_budget(evicted_bytes);
702 self.trim_after_budget_retire();
703 }
704 MatcherBackend::Row => {
705 let vec_pool = &mut self.vec_pool;
706 let mut evicted_bytes = 0usize;
707 self.row_match_generator
708 .as_mut()
709 .expect("row backend must be initialized by reset() before use")
710 .add_data(space, |mut data| {
711 evicted_bytes += data.len();
712 data.resize(data.capacity(), 0);
713 vec_pool.push(data);
714 });
715 self.retire_dictionary_budget(evicted_bytes);
716 self.trim_after_budget_retire();
717 }
718 MatcherBackend::HashChain => {
719 let vec_pool = &mut self.vec_pool;
720 let mut evicted_bytes = 0usize;
721 self.hc_match_generator
722 .as_mut()
723 .expect("hash chain backend must be initialized by reset() before use")
724 .add_data(space, |mut data| {
725 evicted_bytes += data.len();
726 data.resize(data.capacity(), 0);
727 vec_pool.push(data);
728 });
729 self.retire_dictionary_budget(evicted_bytes);
730 self.trim_after_budget_retire();
731 }
732 }
733 }
734
735 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
736 match self.active_backend {
737 MatcherBackend::Simple => {
738 while self.match_generator.next_sequence(&mut handle_sequence) {}
739 }
740 MatcherBackend::Dfast => self
741 .dfast_matcher_mut()
742 .start_matching(&mut handle_sequence),
743 MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
744 MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
745 }
746 }
747 fn skip_matching(&mut self) {
748 match self.active_backend {
749 MatcherBackend::Simple => self.match_generator.skip_matching(),
750 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
751 MatcherBackend::Row => self.row_matcher_mut().skip_matching(),
752 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
753 }
754 }
755}
756
757struct SuffixStore {
760 slots: Vec<Option<NonZeroUsize>>,
764 len_log: u32,
765}
766
767impl SuffixStore {
768 fn with_capacity(capacity: usize) -> Self {
769 Self {
770 slots: alloc::vec![None; capacity],
771 len_log: capacity.ilog2(),
772 }
773 }
774
775 #[inline(always)]
776 fn insert(&mut self, suffix: &[u8], idx: usize) {
777 let key = self.key(suffix);
778 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
779 }
780
781 #[inline(always)]
782 fn contains_key(&self, suffix: &[u8]) -> bool {
783 let key = self.key(suffix);
784 self.slots[key].is_some()
785 }
786
787 #[inline(always)]
788 fn get(&self, suffix: &[u8]) -> Option<usize> {
789 let key = self.key(suffix);
790 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
791 }
792
793 #[inline(always)]
794 fn key(&self, suffix: &[u8]) -> usize {
795 if self.len_log == 0 {
797 return 0;
798 }
799
800 let s0 = suffix[0] as u64;
801 let s1 = suffix[1] as u64;
802 let s2 = suffix[2] as u64;
803 let s3 = suffix[3] as u64;
804 let s4 = suffix[4] as u64;
805
806 const POLY: u64 = 0xCF3BCCDCABu64;
807
808 let s0 = (s0 << 24).wrapping_mul(POLY);
809 let s1 = (s1 << 32).wrapping_mul(POLY);
810 let s2 = (s2 << 40).wrapping_mul(POLY);
811 let s3 = (s3 << 48).wrapping_mul(POLY);
812 let s4 = (s4 << 56).wrapping_mul(POLY);
813
814 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
815 let index = index >> (64 - self.len_log);
816 index as usize % self.slots.len()
817 }
818}
819
820struct WindowEntry {
823 data: Vec<u8>,
824 suffixes: SuffixStore,
826 base_offset: usize,
828}
829
830pub(crate) struct MatchGenerator {
831 max_window_size: usize,
832 window: Vec<WindowEntry>,
835 window_size: usize,
836 #[cfg(debug_assertions)]
837 concat_window: Vec<u8>,
838 suffix_idx: usize,
840 last_idx_in_sequence: usize,
842 hash_fill_step: usize,
843 offset_hist: [u32; 3],
844}
845
846impl MatchGenerator {
847 #[inline(always)]
848 #[cfg(target_endian = "little")]
849 fn mismatch_byte_index(diff: usize) -> usize {
850 diff.trailing_zeros() as usize / 8
851 }
852
853 #[inline(always)]
854 #[cfg(target_endian = "big")]
855 fn mismatch_byte_index(diff: usize) -> usize {
856 diff.leading_zeros() as usize / 8
857 }
858
859 fn new(max_size: usize) -> Self {
861 Self {
862 max_window_size: max_size,
863 window: Vec::new(),
864 window_size: 0,
865 #[cfg(debug_assertions)]
866 concat_window: Vec::new(),
867 suffix_idx: 0,
868 last_idx_in_sequence: 0,
869 hash_fill_step: 1,
870 offset_hist: [1, 4, 8],
871 }
872 }
873
874 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
875 self.window_size = 0;
876 #[cfg(debug_assertions)]
877 self.concat_window.clear();
878 self.suffix_idx = 0;
879 self.last_idx_in_sequence = 0;
880 self.offset_hist = [1, 4, 8];
881 self.window.drain(..).for_each(|entry| {
882 reuse_space(entry.data, entry.suffixes);
883 });
884 }
885
886 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
891 loop {
892 let last_entry = self.window.last().unwrap();
893 let data_slice = &last_entry.data;
894
895 if self.suffix_idx >= data_slice.len() {
897 if self.last_idx_in_sequence != self.suffix_idx {
898 let literals = &data_slice[self.last_idx_in_sequence..];
899 self.last_idx_in_sequence = self.suffix_idx;
900 handle_sequence(Sequence::Literals { literals });
901 return true;
902 } else {
903 return false;
904 }
905 }
906
907 let data_slice = &data_slice[self.suffix_idx..];
909 if data_slice.len() < MIN_MATCH_LEN {
910 let last_idx_in_sequence = self.last_idx_in_sequence;
911 self.last_idx_in_sequence = last_entry.data.len();
912 self.suffix_idx = last_entry.data.len();
913 handle_sequence(Sequence::Literals {
914 literals: &last_entry.data[last_idx_in_sequence..],
915 });
916 return true;
917 }
918
919 let key = &data_slice[..MIN_MATCH_LEN];
921 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
922
923 let mut candidate = self.repcode_candidate(data_slice, literals_len);
925 for match_entry in self.window.iter() {
926 if let Some(match_index) = match_entry.suffixes.get(key) {
927 let match_slice = &match_entry.data[match_index..];
928
929 let match_len = Self::common_prefix_len(match_slice, data_slice);
931
932 if match_len >= MIN_MATCH_LEN {
934 let offset = match_entry.base_offset + self.suffix_idx - match_index;
935
936 #[cfg(debug_assertions)]
938 {
939 let unprocessed = last_entry.data.len() - self.suffix_idx;
940 let start = self.concat_window.len() - unprocessed - offset;
941 let end = start + match_len;
942 let check_slice = &self.concat_window[start..end];
943 debug_assert_eq!(check_slice, &match_slice[..match_len]);
944 }
945
946 if let Some((old_offset, old_match_len)) = candidate {
947 if match_len > old_match_len
948 || (match_len == old_match_len && offset < old_offset)
949 {
950 candidate = Some((offset, match_len));
951 }
952 } else {
953 candidate = Some((offset, match_len));
954 }
955 }
956 }
957 }
958
959 if let Some((offset, match_len)) = candidate {
960 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
963
964 let last_entry = self.window.last().unwrap();
966 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
967
968 self.suffix_idx += match_len;
970 self.last_idx_in_sequence = self.suffix_idx;
971 let _ = encode_offset_with_history(
972 offset as u32,
973 literals.len() as u32,
974 &mut self.offset_hist,
975 );
976 handle_sequence(Sequence::Triple {
977 literals,
978 offset,
979 match_len,
980 });
981
982 return true;
983 }
984
985 let last_entry = self.window.last_mut().unwrap();
986 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
987 if !last_entry.suffixes.contains_key(key) {
988 last_entry.suffixes.insert(key, self.suffix_idx);
989 }
990 self.suffix_idx += 1;
991 }
992 }
993
994 #[inline(always)]
996 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
997 let max = a.len().min(b.len());
998 let chunk = core::mem::size_of::<usize>();
999 let mut off = 0usize;
1000 let lhs = a.as_ptr();
1001 let rhs = b.as_ptr();
1002
1003 while off + chunk <= max {
1004 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1005 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1006 let diff = lhs_word ^ rhs_word;
1007 if diff != 0 {
1008 return off + Self::mismatch_byte_index(diff);
1009 }
1010 off += chunk;
1011 }
1012
1013 off + core::iter::zip(&a[off..max], &b[off..max])
1014 .take_while(|(x, y)| x == y)
1015 .count()
1016 }
1017
1018 #[inline(always)]
1020 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1021 let start = self.suffix_idx;
1022 let last_entry = self.window.last_mut().unwrap();
1023 if last_entry.data.len() < MIN_MATCH_LEN {
1024 return;
1025 }
1026 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1027 if insert_limit > start {
1028 let data = last_entry.data.as_slice();
1029 let suffixes = &mut last_entry.suffixes;
1030 if fill_step == FAST_HASH_FILL_STEP {
1031 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1032 } else {
1033 let mut pos = start;
1034 while pos < insert_limit {
1035 Self::insert_suffix_if_absent(data, suffixes, pos);
1036 pos += fill_step;
1037 }
1038 }
1039 }
1040
1041 if idx >= start + MIN_MATCH_LEN {
1042 let tail_start = idx - MIN_MATCH_LEN;
1043 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1044 if !last_entry.suffixes.contains_key(tail_key) {
1045 last_entry.suffixes.insert(tail_key, tail_start);
1046 }
1047 }
1048 }
1049
1050 #[inline(always)]
1051 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1052 debug_assert!(
1053 pos + MIN_MATCH_LEN <= data.len(),
1054 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1055 pos,
1056 MIN_MATCH_LEN,
1057 data.len()
1058 );
1059 let key = &data[pos..pos + MIN_MATCH_LEN];
1060 if !suffixes.contains_key(key) {
1061 suffixes.insert(key, pos);
1062 }
1063 }
1064
1065 #[inline(always)]
1066 fn add_suffixes_interleaved_fast(
1067 data: &[u8],
1068 suffixes: &mut SuffixStore,
1069 start: usize,
1070 insert_limit: usize,
1071 ) {
1072 let lane = FAST_HASH_FILL_STEP;
1073 let mut pos = start;
1074
1075 while pos + lane * 3 < insert_limit {
1078 let p0 = pos;
1079 let p1 = pos + lane;
1080 let p2 = pos + lane * 2;
1081 let p3 = pos + lane * 3;
1082
1083 Self::insert_suffix_if_absent(data, suffixes, p0);
1084 Self::insert_suffix_if_absent(data, suffixes, p1);
1085 Self::insert_suffix_if_absent(data, suffixes, p2);
1086 Self::insert_suffix_if_absent(data, suffixes, p3);
1087
1088 pos += lane * 4;
1089 }
1090
1091 while pos < insert_limit {
1092 Self::insert_suffix_if_absent(data, suffixes, pos);
1093 pos += lane;
1094 }
1095 }
1096
1097 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1098 if literals_len != 0 {
1099 return None;
1100 }
1101
1102 let reps = [
1103 Some(self.offset_hist[1] as usize),
1104 Some(self.offset_hist[2] as usize),
1105 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1106 ];
1107
1108 let mut best: Option<(usize, usize)> = None;
1109 let mut seen = [0usize; 3];
1110 let mut seen_len = 0usize;
1111 for offset in reps.into_iter().flatten() {
1112 if offset == 0 {
1113 continue;
1114 }
1115 if seen[..seen_len].contains(&offset) {
1116 continue;
1117 }
1118 seen[seen_len] = offset;
1119 seen_len += 1;
1120
1121 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1122 continue;
1123 };
1124 if match_len < MIN_MATCH_LEN {
1125 continue;
1126 }
1127
1128 if best.is_none_or(|(old_offset, old_len)| {
1129 match_len > old_len || (match_len == old_len && offset < old_offset)
1130 }) {
1131 best = Some((offset, match_len));
1132 }
1133 }
1134 best
1135 }
1136
1137 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1138 if offset == 0 {
1139 return None;
1140 }
1141
1142 let last_idx = self.window.len().checked_sub(1)?;
1143 let last_entry = &self.window[last_idx];
1144 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1145 if offset > searchable_prefix {
1146 return None;
1147 }
1148
1149 let mut remaining = offset;
1150 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1151 (last_idx, self.suffix_idx - remaining)
1152 } else {
1153 remaining -= self.suffix_idx;
1154 let mut found = None;
1155 for entry_idx in (0..last_idx).rev() {
1156 let len = self.window[entry_idx].data.len();
1157 if remaining <= len {
1158 found = Some((entry_idx, len - remaining));
1159 break;
1160 }
1161 remaining -= len;
1162 }
1163 found?
1164 };
1165
1166 let match_entry = &self.window[entry_idx];
1167 let match_slice = &match_entry.data[match_index..];
1168
1169 Some(Self::common_prefix_len(match_slice, data_slice))
1170 }
1171
1172 fn skip_matching(&mut self) {
1174 let len = self.window.last().unwrap().data.len();
1175 self.add_suffixes_till(len, 1);
1176 self.suffix_idx = len;
1177 self.last_idx_in_sequence = len;
1178 }
1179
1180 fn add_data(
1183 &mut self,
1184 data: Vec<u8>,
1185 suffixes: SuffixStore,
1186 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1187 ) {
1188 assert!(
1189 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1190 );
1191 self.reserve(data.len(), reuse_space);
1192 #[cfg(debug_assertions)]
1193 self.concat_window.extend_from_slice(&data);
1194
1195 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1196 for entry in self.window.iter_mut() {
1197 entry.base_offset += last_len;
1198 }
1199 }
1200
1201 let len = data.len();
1202 self.window.push(WindowEntry {
1203 data,
1204 suffixes,
1205 base_offset: 0,
1206 });
1207 self.window_size += len;
1208 self.suffix_idx = 0;
1209 self.last_idx_in_sequence = 0;
1210 }
1211
1212 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1215 assert!(self.max_window_size >= amount);
1216 while self.window_size + amount > self.max_window_size {
1217 let removed = self.window.remove(0);
1218 self.window_size -= removed.data.len();
1219 #[cfg(debug_assertions)]
1220 self.concat_window.drain(0..removed.data.len());
1221
1222 let WindowEntry {
1223 suffixes,
1224 data: leaked_vec,
1225 base_offset: _,
1226 } = removed;
1227 reuse_space(leaked_vec, suffixes);
1228 }
1229 }
1230}
1231
1232struct DfastMatchGenerator {
1233 max_window_size: usize,
1234 window: VecDeque<Vec<u8>>,
1235 window_size: usize,
1236 history: Vec<u8>,
1239 history_start: usize,
1240 history_abs_start: usize,
1241 offset_hist: [u32; 3],
1242 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1243 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1244 hash_bits: usize,
1245 lazy_depth: u8,
1247}
1248
1249#[derive(Copy, Clone)]
1250struct MatchCandidate {
1251 start: usize,
1252 offset: usize,
1253 match_len: usize,
1254}
1255
1256fn best_len_offset_candidate(
1257 lhs: Option<MatchCandidate>,
1258 rhs: Option<MatchCandidate>,
1259) -> Option<MatchCandidate> {
1260 match (lhs, rhs) {
1261 (None, other) | (other, None) => other,
1262 (Some(lhs), Some(rhs)) => {
1263 if rhs.match_len > lhs.match_len
1264 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1265 {
1266 Some(rhs)
1267 } else {
1268 Some(lhs)
1269 }
1270 }
1271 }
1272}
1273
1274fn extend_backwards_shared(
1275 concat: &[u8],
1276 history_abs_start: usize,
1277 mut candidate_pos: usize,
1278 mut abs_pos: usize,
1279 mut match_len: usize,
1280 lit_len: usize,
1281) -> MatchCandidate {
1282 let min_abs_pos = abs_pos - lit_len;
1283 while abs_pos > min_abs_pos
1284 && candidate_pos > history_abs_start
1285 && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1286 {
1287 candidate_pos -= 1;
1288 abs_pos -= 1;
1289 match_len += 1;
1290 }
1291 MatchCandidate {
1292 start: abs_pos,
1293 offset: abs_pos - candidate_pos,
1294 match_len,
1295 }
1296}
1297
1298fn repcode_candidate_shared(
1299 concat: &[u8],
1300 history_abs_start: usize,
1301 offset_hist: [u32; 3],
1302 abs_pos: usize,
1303 lit_len: usize,
1304 min_match_len: usize,
1305) -> Option<MatchCandidate> {
1306 let reps = if lit_len == 0 {
1307 [
1308 Some(offset_hist[1] as usize),
1309 Some(offset_hist[2] as usize),
1310 (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1311 ]
1312 } else {
1313 [
1314 Some(offset_hist[0] as usize),
1315 Some(offset_hist[1] as usize),
1316 Some(offset_hist[2] as usize),
1317 ]
1318 };
1319
1320 let current_idx = abs_pos - history_abs_start;
1321 if current_idx + min_match_len > concat.len() {
1322 return None;
1323 }
1324
1325 let mut best = None;
1326 for rep in reps.into_iter().flatten() {
1327 if rep == 0 || rep > abs_pos {
1328 continue;
1329 }
1330 let candidate_pos = abs_pos - rep;
1331 if candidate_pos < history_abs_start {
1332 continue;
1333 }
1334 let candidate_idx = candidate_pos - history_abs_start;
1335 let match_len =
1336 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1337 if match_len >= min_match_len {
1338 let candidate = extend_backwards_shared(
1339 concat,
1340 history_abs_start,
1341 candidate_pos,
1342 abs_pos,
1343 match_len,
1344 lit_len,
1345 );
1346 best = best_len_offset_candidate(best, Some(candidate));
1347 }
1348 }
1349 best
1350}
1351
1352#[derive(Copy, Clone)]
1353struct LazyMatchConfig {
1354 target_len: usize,
1355 min_match_len: usize,
1356 lazy_depth: u8,
1357 history_abs_end: usize,
1358}
1359
1360fn pick_lazy_match_shared(
1361 abs_pos: usize,
1362 lit_len: usize,
1363 best: Option<MatchCandidate>,
1364 config: LazyMatchConfig,
1365 mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1366) -> Option<MatchCandidate> {
1367 let best = best?;
1368 if best.match_len >= config.target_len
1369 || abs_pos + 1 + config.min_match_len > config.history_abs_end
1370 {
1371 return Some(best);
1372 }
1373
1374 let next = best_match_at(abs_pos + 1, lit_len + 1);
1375 if let Some(next) = next
1376 && (next.match_len > best.match_len
1377 || (next.match_len == best.match_len && next.offset < best.offset))
1378 {
1379 return None;
1380 }
1381
1382 if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1383 let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1384 if let Some(next2) = next2
1385 && next2.match_len > best.match_len + 1
1386 {
1387 return None;
1388 }
1389 }
1390
1391 Some(best)
1392}
1393
1394impl DfastMatchGenerator {
1395 fn new(max_window_size: usize) -> Self {
1396 Self {
1397 max_window_size,
1398 window: VecDeque::new(),
1399 window_size: 0,
1400 history: Vec::new(),
1401 history_start: 0,
1402 history_abs_start: 0,
1403 offset_hist: [1, 4, 8],
1404 short_hash: Vec::new(),
1405 long_hash: Vec::new(),
1406 hash_bits: DFAST_HASH_BITS,
1407 lazy_depth: 1,
1408 }
1409 }
1410
1411 fn set_hash_bits(&mut self, bits: usize) {
1412 let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1413 if self.hash_bits != clamped {
1414 self.hash_bits = clamped;
1415 self.short_hash = Vec::new();
1416 self.long_hash = Vec::new();
1417 }
1418 }
1419
1420 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1421 self.window_size = 0;
1422 self.history.clear();
1423 self.history_start = 0;
1424 self.history_abs_start = 0;
1425 self.offset_hist = [1, 4, 8];
1426 if !self.short_hash.is_empty() {
1427 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1428 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1429 }
1430 for mut data in self.window.drain(..) {
1431 data.resize(data.capacity(), 0);
1432 reuse_space(data);
1433 }
1434 }
1435
1436 fn get_last_space(&self) -> &[u8] {
1437 self.window.back().unwrap().as_slice()
1438 }
1439
1440 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1441 assert!(data.len() <= self.max_window_size);
1442 while self.window_size + data.len() > self.max_window_size {
1443 let removed = self.window.pop_front().unwrap();
1444 self.window_size -= removed.len();
1445 self.history_start += removed.len();
1446 self.history_abs_start += removed.len();
1447 reuse_space(removed);
1448 }
1449 self.compact_history();
1450 self.history.extend_from_slice(&data);
1451 self.window_size += data.len();
1452 self.window.push_back(data);
1453 }
1454
1455 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1456 while self.window_size > self.max_window_size {
1457 let removed = self.window.pop_front().unwrap();
1458 self.window_size -= removed.len();
1459 self.history_start += removed.len();
1460 self.history_abs_start += removed.len();
1461 reuse_space(removed);
1462 }
1463 }
1464
1465 fn skip_matching(&mut self) {
1466 self.ensure_hash_tables();
1467 let current_len = self.window.back().unwrap().len();
1468 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1469 self.insert_positions(current_abs_start, current_abs_start + current_len);
1470 }
1471
1472 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1473 self.ensure_hash_tables();
1474
1475 let current_len = self.window.back().unwrap().len();
1476 if current_len == 0 {
1477 return;
1478 }
1479
1480 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1481
1482 let mut pos = 0usize;
1483 let mut literals_start = 0usize;
1484 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1485 let abs_pos = current_abs_start + pos;
1486 let lit_len = pos - literals_start;
1487
1488 let best = self.best_match(abs_pos, lit_len);
1489 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1490 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1491 let current = self.window.back().unwrap().as_slice();
1492 let start = candidate.start - current_abs_start;
1493 let literals = ¤t[literals_start..start];
1494 handle_sequence(Sequence::Triple {
1495 literals,
1496 offset: candidate.offset,
1497 match_len: candidate.match_len,
1498 });
1499 let _ = encode_offset_with_history(
1502 candidate.offset as u32,
1503 literals.len() as u32,
1504 &mut self.offset_hist,
1505 );
1506 pos = start + candidate.match_len;
1507 literals_start = pos;
1508 } else {
1509 self.insert_position(abs_pos);
1510 pos += 1;
1511 }
1512 }
1513
1514 if literals_start < current_len {
1515 let current = self.window.back().unwrap().as_slice();
1522 handle_sequence(Sequence::Literals {
1523 literals: ¤t[literals_start..],
1524 });
1525 }
1526 }
1527
1528 fn ensure_hash_tables(&mut self) {
1529 let table_len = 1usize << self.hash_bits;
1530 if self.short_hash.len() != table_len {
1531 self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1535 self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1536 }
1537 }
1538
1539 fn compact_history(&mut self) {
1540 if self.history_start == 0 {
1541 return;
1542 }
1543 if self.history_start >= self.max_window_size
1544 || self.history_start * 2 >= self.history.len()
1545 {
1546 self.history.drain(..self.history_start);
1547 self.history_start = 0;
1548 }
1549 }
1550
1551 fn live_history(&self) -> &[u8] {
1552 &self.history[self.history_start..]
1553 }
1554
1555 fn history_abs_end(&self) -> usize {
1556 self.history_abs_start + self.live_history().len()
1557 }
1558
1559 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1560 let rep = self.repcode_candidate(abs_pos, lit_len);
1561 let hash = self.hash_candidate(abs_pos, lit_len);
1562 best_len_offset_candidate(rep, hash)
1563 }
1564
1565 fn pick_lazy_match(
1566 &self,
1567 abs_pos: usize,
1568 lit_len: usize,
1569 best: Option<MatchCandidate>,
1570 ) -> Option<MatchCandidate> {
1571 pick_lazy_match_shared(
1572 abs_pos,
1573 lit_len,
1574 best,
1575 LazyMatchConfig {
1576 target_len: DFAST_TARGET_LEN,
1577 min_match_len: DFAST_MIN_MATCH_LEN,
1578 lazy_depth: self.lazy_depth,
1579 history_abs_end: self.history_abs_end(),
1580 },
1581 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1582 )
1583 }
1584
1585 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1586 repcode_candidate_shared(
1587 self.live_history(),
1588 self.history_abs_start,
1589 self.offset_hist,
1590 abs_pos,
1591 lit_len,
1592 DFAST_MIN_MATCH_LEN,
1593 )
1594 }
1595
1596 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1597 let concat = self.live_history();
1598 let current_idx = abs_pos - self.history_abs_start;
1599 let mut best = None;
1600 for candidate_pos in self.long_candidates(abs_pos) {
1601 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1602 continue;
1603 }
1604 let candidate_idx = candidate_pos - self.history_abs_start;
1605 let match_len =
1606 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1607 if match_len >= DFAST_MIN_MATCH_LEN {
1608 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1609 best = best_len_offset_candidate(best, Some(candidate));
1610 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1611 return best;
1612 }
1613 }
1614 }
1615
1616 for candidate_pos in self.short_candidates(abs_pos) {
1617 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1618 continue;
1619 }
1620 let candidate_idx = candidate_pos - self.history_abs_start;
1621 let match_len =
1622 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1623 if match_len >= DFAST_MIN_MATCH_LEN {
1624 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1625 best = best_len_offset_candidate(best, Some(candidate));
1626 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1627 return best;
1628 }
1629 }
1630 }
1631 best
1632 }
1633
1634 fn extend_backwards(
1635 &self,
1636 candidate_pos: usize,
1637 abs_pos: usize,
1638 match_len: usize,
1639 lit_len: usize,
1640 ) -> MatchCandidate {
1641 extend_backwards_shared(
1642 self.live_history(),
1643 self.history_abs_start,
1644 candidate_pos,
1645 abs_pos,
1646 match_len,
1647 lit_len,
1648 )
1649 }
1650
1651 fn insert_positions(&mut self, start: usize, end: usize) {
1652 for pos in start..end {
1653 self.insert_position(pos);
1654 }
1655 }
1656
1657 fn insert_position(&mut self, pos: usize) {
1658 let idx = pos - self.history_abs_start;
1659 let short = {
1660 let concat = self.live_history();
1661 (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1662 };
1663 if let Some(short) = short {
1664 let bucket = &mut self.short_hash[short];
1665 if bucket[0] != pos {
1666 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1667 bucket[0] = pos;
1668 }
1669 }
1670
1671 let long = {
1672 let concat = self.live_history();
1673 (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1674 };
1675 if let Some(long) = long {
1676 let bucket = &mut self.long_hash[long];
1677 if bucket[0] != pos {
1678 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1679 bucket[0] = pos;
1680 }
1681 }
1682 }
1683
1684 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1685 let concat = self.live_history();
1686 let idx = pos - self.history_abs_start;
1687 (idx + 4 <= concat.len())
1688 .then(|| self.short_hash[self.hash4(&concat[idx..])])
1689 .into_iter()
1690 .flatten()
1691 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1692 }
1693
1694 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1695 let concat = self.live_history();
1696 let idx = pos - self.history_abs_start;
1697 (idx + 8 <= concat.len())
1698 .then(|| self.long_hash[self.hash8(&concat[idx..])])
1699 .into_iter()
1700 .flatten()
1701 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1702 }
1703
1704 fn hash4(&self, data: &[u8]) -> usize {
1705 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1706 self.hash_index(value)
1707 }
1708
1709 fn hash8(&self, data: &[u8]) -> usize {
1710 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1711 self.hash_index(value)
1712 }
1713
1714 fn hash_index(&self, value: u64) -> usize {
1715 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1716 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1717 }
1718}
1719
1720struct RowMatchGenerator {
1721 max_window_size: usize,
1722 window: VecDeque<Vec<u8>>,
1723 window_size: usize,
1724 history: Vec<u8>,
1725 history_start: usize,
1726 history_abs_start: usize,
1727 offset_hist: [u32; 3],
1728 row_hash_log: usize,
1729 row_log: usize,
1730 search_depth: usize,
1731 target_len: usize,
1732 lazy_depth: u8,
1733 row_heads: Vec<u8>,
1734 row_positions: Vec<usize>,
1735 row_tags: Vec<u8>,
1736}
1737
1738impl RowMatchGenerator {
1739 fn new(max_window_size: usize) -> Self {
1740 Self {
1741 max_window_size,
1742 window: VecDeque::new(),
1743 window_size: 0,
1744 history: Vec::new(),
1745 history_start: 0,
1746 history_abs_start: 0,
1747 offset_hist: [1, 4, 8],
1748 row_hash_log: ROW_HASH_BITS - ROW_LOG,
1749 row_log: ROW_LOG,
1750 search_depth: ROW_SEARCH_DEPTH,
1751 target_len: ROW_TARGET_LEN,
1752 lazy_depth: 1,
1753 row_heads: Vec::new(),
1754 row_positions: Vec::new(),
1755 row_tags: Vec::new(),
1756 }
1757 }
1758
1759 fn set_hash_bits(&mut self, bits: usize) {
1760 let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
1761 let row_hash_log = clamped.saturating_sub(self.row_log);
1762 if self.row_hash_log != row_hash_log {
1763 self.row_hash_log = row_hash_log;
1764 self.row_heads.clear();
1765 self.row_positions.clear();
1766 self.row_tags.clear();
1767 }
1768 }
1769
1770 fn configure(&mut self, config: RowConfig) {
1771 self.row_log = config.row_log.clamp(4, 6);
1772 self.search_depth = config.search_depth;
1773 self.target_len = config.target_len;
1774 self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
1775 }
1776
1777 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1778 self.window_size = 0;
1779 self.history.clear();
1780 self.history_start = 0;
1781 self.history_abs_start = 0;
1782 self.offset_hist = [1, 4, 8];
1783 self.row_heads.fill(0);
1784 self.row_positions.fill(ROW_EMPTY_SLOT);
1785 self.row_tags.fill(0);
1786 for mut data in self.window.drain(..) {
1787 data.resize(data.capacity(), 0);
1788 reuse_space(data);
1789 }
1790 }
1791
1792 fn get_last_space(&self) -> &[u8] {
1793 self.window.back().unwrap().as_slice()
1794 }
1795
1796 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1797 assert!(data.len() <= self.max_window_size);
1798 while self.window_size + data.len() > self.max_window_size {
1799 let removed = self.window.pop_front().unwrap();
1800 self.window_size -= removed.len();
1801 self.history_start += removed.len();
1802 self.history_abs_start += removed.len();
1803 reuse_space(removed);
1804 }
1805 self.compact_history();
1806 self.history.extend_from_slice(&data);
1807 self.window_size += data.len();
1808 self.window.push_back(data);
1809 }
1810
1811 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1812 while self.window_size > self.max_window_size {
1813 let removed = self.window.pop_front().unwrap();
1814 self.window_size -= removed.len();
1815 self.history_start += removed.len();
1816 self.history_abs_start += removed.len();
1817 reuse_space(removed);
1818 }
1819 }
1820
1821 fn skip_matching(&mut self) {
1822 self.ensure_tables();
1823 let current_len = self.window.back().unwrap().len();
1824 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1825 let backfill_start = self.backfill_start(current_abs_start);
1826 if backfill_start < current_abs_start {
1827 self.insert_positions(backfill_start, current_abs_start);
1828 }
1829 self.insert_positions(current_abs_start, current_abs_start + current_len);
1830 }
1831
1832 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1833 self.ensure_tables();
1834
1835 let current_len = self.window.back().unwrap().len();
1836 if current_len == 0 {
1837 return;
1838 }
1839 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1840 let backfill_start = self.backfill_start(current_abs_start);
1841 if backfill_start < current_abs_start {
1842 self.insert_positions(backfill_start, current_abs_start);
1843 }
1844
1845 let mut pos = 0usize;
1846 let mut literals_start = 0usize;
1847 while pos + ROW_MIN_MATCH_LEN <= current_len {
1848 let abs_pos = current_abs_start + pos;
1849 let lit_len = pos - literals_start;
1850
1851 let best = self.best_match(abs_pos, lit_len);
1852 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1853 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1854 let current = self.window.back().unwrap().as_slice();
1855 let start = candidate.start - current_abs_start;
1856 let literals = ¤t[literals_start..start];
1857 handle_sequence(Sequence::Triple {
1858 literals,
1859 offset: candidate.offset,
1860 match_len: candidate.match_len,
1861 });
1862 let _ = encode_offset_with_history(
1863 candidate.offset as u32,
1864 literals.len() as u32,
1865 &mut self.offset_hist,
1866 );
1867 pos = start + candidate.match_len;
1868 literals_start = pos;
1869 } else {
1870 self.insert_position(abs_pos);
1871 pos += 1;
1872 }
1873 }
1874
1875 while pos + ROW_HASH_KEY_LEN <= current_len {
1876 self.insert_position(current_abs_start + pos);
1877 pos += 1;
1878 }
1879
1880 if literals_start < current_len {
1881 let current = self.window.back().unwrap().as_slice();
1882 handle_sequence(Sequence::Literals {
1883 literals: ¤t[literals_start..],
1884 });
1885 }
1886 }
1887
1888 fn ensure_tables(&mut self) {
1889 let row_count = 1usize << self.row_hash_log;
1890 let row_entries = 1usize << self.row_log;
1891 let total = row_count * row_entries;
1892 if self.row_positions.len() != total {
1893 self.row_heads = alloc::vec![0; row_count];
1894 self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
1895 self.row_tags = alloc::vec![0; total];
1896 }
1897 }
1898
1899 fn compact_history(&mut self) {
1900 if self.history_start == 0 {
1901 return;
1902 }
1903 if self.history_start >= self.max_window_size
1904 || self.history_start * 2 >= self.history.len()
1905 {
1906 self.history.drain(..self.history_start);
1907 self.history_start = 0;
1908 }
1909 }
1910
1911 fn live_history(&self) -> &[u8] {
1912 &self.history[self.history_start..]
1913 }
1914
1915 fn history_abs_end(&self) -> usize {
1916 self.history_abs_start + self.live_history().len()
1917 }
1918
1919 fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
1920 let idx = abs_pos - self.history_abs_start;
1921 let concat = self.live_history();
1922 if idx + ROW_HASH_KEY_LEN > concat.len() {
1923 return None;
1924 }
1925 let value =
1926 u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
1927 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1928 let hash = value.wrapping_mul(PRIME);
1929 let total_bits = self.row_hash_log + ROW_TAG_BITS;
1930 let combined = hash >> (u64::BITS as usize - total_bits);
1931 let row_mask = (1usize << self.row_hash_log) - 1;
1932 let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
1933 let tag = combined as u8;
1934 Some((row, tag))
1935 }
1936
1937 fn backfill_start(&self, current_abs_start: usize) -> usize {
1938 current_abs_start
1939 .saturating_sub(ROW_HASH_KEY_LEN - 1)
1940 .max(self.history_abs_start)
1941 }
1942
1943 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1944 let rep = self.repcode_candidate(abs_pos, lit_len);
1945 let row = self.row_candidate(abs_pos, lit_len);
1946 best_len_offset_candidate(rep, row)
1947 }
1948
1949 fn pick_lazy_match(
1950 &self,
1951 abs_pos: usize,
1952 lit_len: usize,
1953 best: Option<MatchCandidate>,
1954 ) -> Option<MatchCandidate> {
1955 pick_lazy_match_shared(
1956 abs_pos,
1957 lit_len,
1958 best,
1959 LazyMatchConfig {
1960 target_len: self.target_len,
1961 min_match_len: ROW_MIN_MATCH_LEN,
1962 lazy_depth: self.lazy_depth,
1963 history_abs_end: self.history_abs_end(),
1964 },
1965 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1966 )
1967 }
1968
1969 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1970 repcode_candidate_shared(
1971 self.live_history(),
1972 self.history_abs_start,
1973 self.offset_hist,
1974 abs_pos,
1975 lit_len,
1976 ROW_MIN_MATCH_LEN,
1977 )
1978 }
1979
1980 fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1981 let concat = self.live_history();
1982 let current_idx = abs_pos - self.history_abs_start;
1983 if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
1984 return None;
1985 }
1986
1987 let (row, tag) = self.hash_and_row(abs_pos)?;
1988 let row_entries = 1usize << self.row_log;
1989 let row_mask = row_entries - 1;
1990 let row_base = row << self.row_log;
1991 let head = self.row_heads[row] as usize;
1992 let max_walk = self.search_depth.min(row_entries);
1993
1994 let mut best = None;
1995 for i in 0..max_walk {
1996 let slot = (head + i) & row_mask;
1997 let idx = row_base + slot;
1998 if self.row_tags[idx] != tag {
1999 continue;
2000 }
2001 let candidate_pos = self.row_positions[idx];
2002 if candidate_pos == ROW_EMPTY_SLOT
2003 || candidate_pos < self.history_abs_start
2004 || candidate_pos >= abs_pos
2005 {
2006 continue;
2007 }
2008 let candidate_idx = candidate_pos - self.history_abs_start;
2009 let match_len =
2010 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2011 if match_len >= ROW_MIN_MATCH_LEN {
2012 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2013 best = best_len_offset_candidate(best, Some(candidate));
2014 if best.is_some_and(|best| best.match_len >= self.target_len) {
2015 return best;
2016 }
2017 }
2018 }
2019 best
2020 }
2021
2022 fn extend_backwards(
2023 &self,
2024 candidate_pos: usize,
2025 abs_pos: usize,
2026 match_len: usize,
2027 lit_len: usize,
2028 ) -> MatchCandidate {
2029 extend_backwards_shared(
2030 self.live_history(),
2031 self.history_abs_start,
2032 candidate_pos,
2033 abs_pos,
2034 match_len,
2035 lit_len,
2036 )
2037 }
2038
2039 fn insert_positions(&mut self, start: usize, end: usize) {
2040 for pos in start..end {
2041 self.insert_position(pos);
2042 }
2043 }
2044
2045 fn insert_position(&mut self, abs_pos: usize) {
2046 let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2047 return;
2048 };
2049 let row_entries = 1usize << self.row_log;
2050 let row_mask = row_entries - 1;
2051 let row_base = row << self.row_log;
2052 let head = self.row_heads[row] as usize;
2053 let next = head.wrapping_sub(1) & row_mask;
2054 self.row_heads[row] = next as u8;
2055 self.row_tags[row_base + next] = tag;
2056 self.row_positions[row_base + next] = abs_pos;
2057 }
2058}
2059
2060struct HcMatchGenerator {
2061 max_window_size: usize,
2062 window: VecDeque<Vec<u8>>,
2063 window_size: usize,
2064 history: Vec<u8>,
2065 history_start: usize,
2066 history_abs_start: usize,
2067 position_base: usize,
2068 offset_hist: [u32; 3],
2069 hash_table: Vec<u32>,
2070 chain_table: Vec<u32>,
2071 lazy_depth: u8,
2072 hash_log: usize,
2073 chain_log: usize,
2074 search_depth: usize,
2075 target_len: usize,
2076}
2077
2078impl HcMatchGenerator {
2079 fn new(max_window_size: usize) -> Self {
2080 Self {
2081 max_window_size,
2082 window: VecDeque::new(),
2083 window_size: 0,
2084 history: Vec::new(),
2085 history_start: 0,
2086 history_abs_start: 0,
2087 position_base: 0,
2088 offset_hist: [1, 4, 8],
2089 hash_table: Vec::new(),
2090 chain_table: Vec::new(),
2091 lazy_depth: 2,
2092 hash_log: HC_HASH_LOG,
2093 chain_log: HC_CHAIN_LOG,
2094 search_depth: HC_SEARCH_DEPTH,
2095 target_len: HC_TARGET_LEN,
2096 }
2097 }
2098
2099 fn configure(&mut self, config: HcConfig) {
2100 let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2101 self.hash_log = config.hash_log;
2102 self.chain_log = config.chain_log;
2103 self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2104 self.target_len = config.target_len;
2105 if resize && !self.hash_table.is_empty() {
2106 self.hash_table.clear();
2108 self.chain_table.clear();
2109 }
2110 }
2111
2112 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2113 self.window_size = 0;
2114 self.history.clear();
2115 self.history_start = 0;
2116 self.history_abs_start = 0;
2117 self.position_base = 0;
2118 self.offset_hist = [1, 4, 8];
2119 if !self.hash_table.is_empty() {
2120 self.hash_table.fill(HC_EMPTY);
2121 self.chain_table.fill(HC_EMPTY);
2122 }
2123 for mut data in self.window.drain(..) {
2124 data.resize(data.capacity(), 0);
2125 reuse_space(data);
2126 }
2127 }
2128
2129 fn get_last_space(&self) -> &[u8] {
2130 self.window.back().unwrap().as_slice()
2131 }
2132
2133 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2137 assert!(data.len() <= self.max_window_size);
2138 while self.window_size + data.len() > self.max_window_size {
2139 let removed = self.window.pop_front().unwrap();
2140 self.window_size -= removed.len();
2141 self.history_start += removed.len();
2142 self.history_abs_start += removed.len();
2143 reuse_space(removed);
2144 }
2145 self.compact_history();
2146 self.history.extend_from_slice(&data);
2147 self.window_size += data.len();
2148 self.window.push_back(data);
2149 }
2150
2151 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2152 while self.window_size > self.max_window_size {
2153 let removed = self.window.pop_front().unwrap();
2154 self.window_size -= removed.len();
2155 self.history_start += removed.len();
2156 self.history_abs_start += removed.len();
2157 reuse_space(removed);
2158 }
2159 }
2160
2161 fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2164 let backfill_start = current_abs_start
2165 .saturating_sub(3)
2166 .max(self.history_abs_start);
2167 if backfill_start < current_abs_start {
2168 self.insert_positions(backfill_start, current_abs_start);
2169 }
2170 }
2171
2172 fn skip_matching(&mut self) {
2173 self.ensure_tables();
2174 let current_len = self.window.back().unwrap().len();
2175 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2176 self.backfill_boundary_positions(current_abs_start);
2177 self.insert_positions(current_abs_start, current_abs_start + current_len);
2178 }
2179
2180 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2181 self.ensure_tables();
2182
2183 let current_len = self.window.back().unwrap().len();
2184 if current_len == 0 {
2185 return;
2186 }
2187
2188 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2189 self.backfill_boundary_positions(current_abs_start);
2190
2191 let mut pos = 0usize;
2192 let mut literals_start = 0usize;
2193 while pos + HC_MIN_MATCH_LEN <= current_len {
2194 let abs_pos = current_abs_start + pos;
2195 let lit_len = pos - literals_start;
2196
2197 let best = self.find_best_match(abs_pos, lit_len);
2198 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2199 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2200 let current = self.window.back().unwrap().as_slice();
2201 let start = candidate.start - current_abs_start;
2202 let literals = ¤t[literals_start..start];
2203 handle_sequence(Sequence::Triple {
2204 literals,
2205 offset: candidate.offset,
2206 match_len: candidate.match_len,
2207 });
2208 let _ = encode_offset_with_history(
2209 candidate.offset as u32,
2210 literals.len() as u32,
2211 &mut self.offset_hist,
2212 );
2213 pos = start + candidate.match_len;
2214 literals_start = pos;
2215 } else {
2216 self.insert_position(abs_pos);
2217 pos += 1;
2218 }
2219 }
2220
2221 while pos + 4 <= current_len {
2224 self.insert_position(current_abs_start + pos);
2225 pos += 1;
2226 }
2227
2228 if literals_start < current_len {
2229 let current = self.window.back().unwrap().as_slice();
2230 handle_sequence(Sequence::Literals {
2231 literals: ¤t[literals_start..],
2232 });
2233 }
2234 }
2235
2236 fn ensure_tables(&mut self) {
2237 if self.hash_table.is_empty() {
2238 self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2239 self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2240 }
2241 }
2242
2243 fn compact_history(&mut self) {
2244 if self.history_start == 0 {
2245 return;
2246 }
2247 if self.history_start >= self.max_window_size
2248 || self.history_start * 2 >= self.history.len()
2249 {
2250 self.history.drain(..self.history_start);
2251 self.history_start = 0;
2252 }
2253 }
2254
2255 fn live_history(&self) -> &[u8] {
2256 &self.history[self.history_start..]
2257 }
2258
2259 fn history_abs_end(&self) -> usize {
2260 self.history_abs_start + self.live_history().len()
2261 }
2262
2263 fn hash_position(&self, data: &[u8]) -> usize {
2264 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2265 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2266 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2267 }
2268
2269 fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2270 let rel = abs_pos.checked_sub(self.position_base)?;
2271 let rel_u32 = u32::try_from(rel).ok()?;
2272 (rel_u32 < u32::MAX).then_some(rel_u32)
2276 }
2277
2278 fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2279 let needs_rebase = self
2280 .relative_position(abs_pos)
2281 .is_none_or(|relative| relative >= u32::MAX - 1);
2282 if !needs_rebase {
2283 return;
2284 }
2285
2286 self.position_base = self.history_abs_start;
2288 self.hash_table.fill(HC_EMPTY);
2289 self.chain_table.fill(HC_EMPTY);
2290
2291 let history_start = self.history_abs_start;
2292 for pos in history_start..abs_pos {
2295 self.insert_position_no_rebase(pos);
2296 }
2297 }
2298
2299 fn insert_position(&mut self, abs_pos: usize) {
2300 self.maybe_rebase_positions(abs_pos);
2301 self.insert_position_no_rebase(abs_pos);
2302 }
2303
2304 fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2305 let idx = abs_pos - self.history_abs_start;
2306 let concat = self.live_history();
2307 if idx + 4 > concat.len() {
2308 return;
2309 }
2310 let hash = self.hash_position(&concat[idx..]);
2311 let Some(relative_pos) = self.relative_position(abs_pos) else {
2312 return;
2313 };
2314 let stored = relative_pos + 1;
2315 let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2316 let prev = self.hash_table[hash];
2317 self.chain_table[chain_idx] = prev;
2318 self.hash_table[hash] = stored;
2319 }
2320
2321 fn insert_positions(&mut self, start: usize, end: usize) {
2322 for pos in start..end {
2323 self.insert_position(pos);
2324 }
2325 }
2326
2327 fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2330 let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2331 let idx = abs_pos - self.history_abs_start;
2332 let concat = self.live_history();
2333 if idx + 4 > concat.len() {
2334 return buf;
2335 }
2336 let hash = self.hash_position(&concat[idx..]);
2337 let chain_mask = (1 << self.chain_log) - 1;
2338
2339 let mut cur = self.hash_table[hash];
2340 let mut filled = 0;
2341 let mut steps = 0;
2349 let max_chain_steps = self.search_depth * 4;
2350 while filled < self.search_depth && steps < max_chain_steps {
2351 if cur == HC_EMPTY {
2352 break;
2353 }
2354 let candidate_rel = cur.wrapping_sub(1) as usize;
2355 let candidate_abs = self.position_base + candidate_rel;
2356 let next = self.chain_table[candidate_rel & chain_mask];
2357 steps += 1;
2358 if next == cur {
2359 if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2362 buf[filled] = candidate_abs;
2363 }
2364 break;
2365 }
2366 cur = next;
2367 if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2368 continue;
2369 }
2370 buf[filled] = candidate_abs;
2371 filled += 1;
2372 }
2373 buf
2374 }
2375
2376 fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2377 let rep = self.repcode_candidate(abs_pos, lit_len);
2378 let hash = self.hash_chain_candidate(abs_pos, lit_len);
2379 Self::better_candidate(rep, hash)
2380 }
2381
2382 fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2383 let concat = self.live_history();
2384 let current_idx = abs_pos - self.history_abs_start;
2385 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2386 return None;
2387 }
2388
2389 let mut best: Option<MatchCandidate> = None;
2390 for candidate_abs in self.chain_candidates(abs_pos) {
2391 if candidate_abs == usize::MAX {
2392 break;
2393 }
2394 let candidate_idx = candidate_abs - self.history_abs_start;
2395 let match_len =
2396 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2397 if match_len >= HC_MIN_MATCH_LEN {
2398 let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2399 best = Self::better_candidate(best, Some(candidate));
2400 if best.is_some_and(|b| b.match_len >= self.target_len) {
2401 return best;
2402 }
2403 }
2404 }
2405 best
2406 }
2407
2408 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2409 let reps = if lit_len == 0 {
2410 [
2411 Some(self.offset_hist[1] as usize),
2412 Some(self.offset_hist[2] as usize),
2413 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2414 ]
2415 } else {
2416 [
2417 Some(self.offset_hist[0] as usize),
2418 Some(self.offset_hist[1] as usize),
2419 Some(self.offset_hist[2] as usize),
2420 ]
2421 };
2422
2423 let concat = self.live_history();
2424 let current_idx = abs_pos - self.history_abs_start;
2425 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2426 return None;
2427 }
2428
2429 let mut best = None;
2430 for rep in reps.into_iter().flatten() {
2431 if rep == 0 || rep > abs_pos {
2432 continue;
2433 }
2434 let candidate_pos = abs_pos - rep;
2435 if candidate_pos < self.history_abs_start {
2436 continue;
2437 }
2438 let candidate_idx = candidate_pos - self.history_abs_start;
2439 let match_len =
2440 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2441 if match_len >= HC_MIN_MATCH_LEN {
2442 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2443 best = Self::better_candidate(best, Some(candidate));
2444 }
2445 }
2446 best
2447 }
2448
2449 fn extend_backwards(
2450 &self,
2451 mut candidate_pos: usize,
2452 mut abs_pos: usize,
2453 mut match_len: usize,
2454 lit_len: usize,
2455 ) -> MatchCandidate {
2456 let concat = self.live_history();
2457 let min_abs_pos = abs_pos - lit_len;
2458 while abs_pos > min_abs_pos
2459 && candidate_pos > self.history_abs_start
2460 && concat[candidate_pos - self.history_abs_start - 1]
2461 == concat[abs_pos - self.history_abs_start - 1]
2462 {
2463 candidate_pos -= 1;
2464 abs_pos -= 1;
2465 match_len += 1;
2466 }
2467 MatchCandidate {
2468 start: abs_pos,
2469 offset: abs_pos - candidate_pos,
2470 match_len,
2471 }
2472 }
2473
2474 fn better_candidate(
2475 lhs: Option<MatchCandidate>,
2476 rhs: Option<MatchCandidate>,
2477 ) -> Option<MatchCandidate> {
2478 match (lhs, rhs) {
2479 (None, other) | (other, None) => other,
2480 (Some(lhs), Some(rhs)) => {
2481 let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
2482 let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
2483 if rhs_gain > lhs_gain {
2484 Some(rhs)
2485 } else {
2486 Some(lhs)
2487 }
2488 }
2489 }
2490 }
2491
2492 fn match_gain(match_len: usize, offset: usize) -> i32 {
2493 debug_assert!(
2494 offset > 0,
2495 "zstd offsets are 1-indexed, offset=0 is invalid"
2496 );
2497 let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
2498 (match_len as i32) * 4 - offset_bits
2499 }
2500
2501 fn pick_lazy_match(
2505 &self,
2506 abs_pos: usize,
2507 lit_len: usize,
2508 best: Option<MatchCandidate>,
2509 ) -> Option<MatchCandidate> {
2510 let best = best?;
2511 if best.match_len >= self.target_len
2512 || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
2513 {
2514 return Some(best);
2515 }
2516
2517 let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
2518
2519 let next = self.find_best_match(abs_pos + 1, lit_len + 1);
2521 if let Some(next) = next {
2522 let next_gain = Self::match_gain(next.match_len, next.offset);
2523 if next_gain > current_gain {
2524 return None;
2525 }
2526 }
2527
2528 if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
2530 let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
2531 if let Some(next2) = next2 {
2532 let next2_gain = Self::match_gain(next2.match_len, next2.offset);
2533 if next2_gain > current_gain + 4 {
2535 return None;
2536 }
2537 }
2538 }
2539
2540 Some(best)
2541 }
2542}
2543
2544#[test]
2545fn matches() {
2546 let mut matcher = MatchGenerator::new(1000);
2547 let mut original_data = Vec::new();
2548 let mut reconstructed = Vec::new();
2549
2550 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
2551 Sequence::Literals { literals } => {
2552 assert!(!literals.is_empty());
2553 reconstructed.extend_from_slice(literals);
2554 }
2555 Sequence::Triple {
2556 literals,
2557 offset,
2558 match_len,
2559 } => {
2560 assert!(offset > 0);
2561 assert!(match_len >= MIN_MATCH_LEN);
2562 reconstructed.extend_from_slice(literals);
2563 assert!(offset <= reconstructed.len());
2564 let start = reconstructed.len() - offset;
2565 for i in 0..match_len {
2566 let byte = reconstructed[start + i];
2567 reconstructed.push(byte);
2568 }
2569 }
2570 };
2571
2572 matcher.add_data(
2573 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2574 SuffixStore::with_capacity(100),
2575 |_, _| {},
2576 );
2577 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2578
2579 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2580
2581 assert!(!matcher.next_sequence(|_| {}));
2582
2583 matcher.add_data(
2584 alloc::vec![
2585 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2586 ],
2587 SuffixStore::with_capacity(100),
2588 |_, _| {},
2589 );
2590 original_data.extend_from_slice(&[
2591 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2592 ]);
2593
2594 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2595 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2596 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2597 assert!(!matcher.next_sequence(|_| {}));
2598
2599 matcher.add_data(
2600 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2601 SuffixStore::with_capacity(100),
2602 |_, _| {},
2603 );
2604 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2605
2606 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2607 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2608 assert!(!matcher.next_sequence(|_| {}));
2609
2610 matcher.add_data(
2611 alloc::vec![0, 0, 0, 0, 0],
2612 SuffixStore::with_capacity(100),
2613 |_, _| {},
2614 );
2615 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2616
2617 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2618 assert!(!matcher.next_sequence(|_| {}));
2619
2620 matcher.add_data(
2621 alloc::vec![7, 8, 9, 10, 11],
2622 SuffixStore::with_capacity(100),
2623 |_, _| {},
2624 );
2625 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2626
2627 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2628 assert!(!matcher.next_sequence(|_| {}));
2629
2630 matcher.add_data(
2631 alloc::vec![1, 3, 5, 7, 9],
2632 SuffixStore::with_capacity(100),
2633 |_, _| {},
2634 );
2635 matcher.skip_matching();
2636 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2637 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2638 assert!(!matcher.next_sequence(|_| {}));
2639
2640 matcher.add_data(
2641 alloc::vec![1, 3, 5, 7, 9],
2642 SuffixStore::with_capacity(100),
2643 |_, _| {},
2644 );
2645 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2646
2647 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2648 assert!(!matcher.next_sequence(|_| {}));
2649
2650 matcher.add_data(
2651 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2652 SuffixStore::with_capacity(100),
2653 |_, _| {},
2654 );
2655 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2656
2657 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2658 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2659 assert!(!matcher.next_sequence(|_| {}));
2660
2661 assert_eq!(reconstructed, original_data);
2662}
2663
2664#[test]
2665fn dfast_matches_roundtrip_multi_block_pattern() {
2666 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2667 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2668 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2669
2670 let mut matcher = DfastMatchGenerator::new(1 << 22);
2671 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2672 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2673 Sequence::Triple {
2674 literals,
2675 offset,
2676 match_len,
2677 } => {
2678 decoded.extend_from_slice(literals);
2679 let start = decoded.len() - offset;
2680 for i in 0..match_len {
2681 let byte = decoded[start + i];
2682 decoded.push(byte);
2683 }
2684 }
2685 };
2686
2687 matcher.add_data(first_block.clone(), |_| {});
2688 let mut history = Vec::new();
2689 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2690 assert_eq!(history, first_block);
2691
2692 matcher.add_data(second_block.clone(), |_| {});
2693 let prefix_len = history.len();
2694 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2695
2696 assert_eq!(&history[prefix_len..], second_block.as_slice());
2697}
2698
2699#[test]
2700fn driver_switches_backends_and_initializes_dfast_via_reset() {
2701 let mut driver = MatchGeneratorDriver::new(32, 2);
2702
2703 driver.reset(CompressionLevel::Default);
2704 assert_eq!(driver.active_backend, MatcherBackend::Dfast);
2705 assert_eq!(driver.window_size(), (1u64 << 22));
2706
2707 let mut first = driver.get_next_space();
2708 first[..12].copy_from_slice(b"abcabcabcabc");
2709 first.truncate(12);
2710 driver.commit_space(first);
2711 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2712 driver.skip_matching();
2713
2714 let mut second = driver.get_next_space();
2715 second[..12].copy_from_slice(b"abcabcabcabc");
2716 second.truncate(12);
2717 driver.commit_space(second);
2718
2719 let mut reconstructed = b"abcabcabcabc".to_vec();
2720 driver.start_matching(|seq| match seq {
2721 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2722 Sequence::Triple {
2723 literals,
2724 offset,
2725 match_len,
2726 } => {
2727 reconstructed.extend_from_slice(literals);
2728 let start = reconstructed.len() - offset;
2729 for i in 0..match_len {
2730 let byte = reconstructed[start + i];
2731 reconstructed.push(byte);
2732 }
2733 }
2734 });
2735 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2736
2737 driver.reset(CompressionLevel::Fastest);
2738 assert_eq!(driver.window_size(), (1u64 << 17));
2739}
2740
2741#[test]
2742fn driver_level4_selects_row_backend() {
2743 let mut driver = MatchGeneratorDriver::new(32, 2);
2744 driver.reset(CompressionLevel::Level(4));
2745 assert_eq!(driver.active_backend, MatcherBackend::Row);
2746}
2747
2748#[test]
2749fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2750 let mut driver = MatchGeneratorDriver::new(32, 2);
2751
2752 driver.reset(CompressionLevel::Level(2));
2753 let mut space = driver.get_next_space();
2754 space[..12].copy_from_slice(b"abcabcabcabc");
2755 space.truncate(12);
2756 driver.commit_space(space);
2757 driver.skip_matching();
2758 let full_tables = driver.dfast_matcher().short_hash.len();
2759 assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2760
2761 driver.set_source_size_hint(1024);
2762 driver.reset(CompressionLevel::Level(2));
2763 let mut space = driver.get_next_space();
2764 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2765 space.truncate(12);
2766 driver.commit_space(space);
2767 driver.skip_matching();
2768 let hinted_tables = driver.dfast_matcher().short_hash.len();
2769
2770 assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG);
2771 assert_eq!(hinted_tables, 1 << MIN_WINDOW_LOG);
2772 assert!(
2773 hinted_tables < full_tables,
2774 "tiny source hint should reduce dfast table footprint"
2775 );
2776}
2777
2778#[test]
2779fn driver_small_source_hint_shrinks_row_hash_tables() {
2780 let mut driver = MatchGeneratorDriver::new(32, 2);
2781
2782 driver.reset(CompressionLevel::Level(4));
2783 let mut space = driver.get_next_space();
2784 space[..12].copy_from_slice(b"abcabcabcabc");
2785 space.truncate(12);
2786 driver.commit_space(space);
2787 driver.skip_matching();
2788 let full_rows = driver.row_matcher().row_heads.len();
2789 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
2790
2791 driver.set_source_size_hint(1024);
2792 driver.reset(CompressionLevel::Level(4));
2793 let mut space = driver.get_next_space();
2794 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2795 space.truncate(12);
2796 driver.commit_space(space);
2797 driver.skip_matching();
2798 let hinted_rows = driver.row_matcher().row_heads.len();
2799
2800 assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG);
2801 assert_eq!(hinted_rows, 1 << ((MIN_WINDOW_LOG as usize) - ROW_LOG));
2802 assert!(
2803 hinted_rows < full_rows,
2804 "tiny source hint should reduce row hash table footprint"
2805 );
2806}
2807
2808#[test]
2809fn row_matches_roundtrip_multi_block_pattern() {
2810 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
2811 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2812 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2813
2814 let mut matcher = RowMatchGenerator::new(1 << 22);
2815 matcher.configure(ROW_CONFIG);
2816 matcher.ensure_tables();
2817 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2818 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2819 Sequence::Triple {
2820 literals,
2821 offset,
2822 match_len,
2823 } => {
2824 decoded.extend_from_slice(literals);
2825 let start = decoded.len() - offset;
2826 for i in 0..match_len {
2827 let byte = decoded[start + i];
2828 decoded.push(byte);
2829 }
2830 }
2831 };
2832
2833 matcher.add_data(first_block.clone(), |_| {});
2834 let mut history = Vec::new();
2835 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2836 assert_eq!(history, first_block);
2837
2838 matcher.add_data(second_block.clone(), |_| {});
2839 let prefix_len = history.len();
2840 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2841
2842 assert_eq!(&history[prefix_len..], second_block.as_slice());
2843
2844 let third_block: Vec<u8> = (0u8..=255).collect();
2846 matcher.add_data(third_block.clone(), |_| {});
2847 let third_prefix = history.len();
2848 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2849 assert_eq!(&history[third_prefix..], third_block.as_slice());
2850}
2851
2852#[test]
2853fn row_short_block_emits_literals_only() {
2854 let mut matcher = RowMatchGenerator::new(1 << 22);
2855 matcher.configure(ROW_CONFIG);
2856
2857 matcher.add_data(b"abcde".to_vec(), |_| {});
2858
2859 let mut saw_triple = false;
2860 let mut reconstructed = Vec::new();
2861 matcher.start_matching(|seq| match seq {
2862 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2863 Sequence::Triple { .. } => saw_triple = true,
2864 });
2865
2866 assert!(
2867 !saw_triple,
2868 "row backend must not emit triples for short blocks"
2869 );
2870 assert_eq!(reconstructed, b"abcde");
2871
2872 saw_triple = false;
2874 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
2875 matcher.start_matching(|seq| {
2876 if let Sequence::Triple { .. } = seq {
2877 saw_triple = true;
2878 }
2879 });
2880 assert!(
2881 saw_triple,
2882 "row backend should emit triples on repeated data"
2883 );
2884}
2885
2886#[test]
2887fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
2888 let mut matcher = RowMatchGenerator::new(1 << 22);
2889 matcher.configure(ROW_CONFIG);
2890 matcher.add_data(b"abcabc".to_vec(), |_| {});
2891
2892 let best = MatchCandidate {
2893 start: 0,
2894 offset: 1,
2895 match_len: ROW_MIN_MATCH_LEN,
2896 };
2897 let picked = matcher
2898 .pick_lazy_match(0, 0, Some(best))
2899 .expect("best candidate must survive");
2900
2901 assert_eq!(picked.start, best.start);
2902 assert_eq!(picked.offset, best.offset);
2903 assert_eq!(picked.match_len, best.match_len);
2904}
2905
2906#[test]
2907fn row_backfills_previous_block_tail_for_cross_boundary_match() {
2908 let mut matcher = RowMatchGenerator::new(1 << 22);
2909 matcher.configure(ROW_CONFIG);
2910
2911 let mut first_block = alloc::vec![0xA5; 64];
2912 first_block.extend_from_slice(b"XYZ");
2913 let second_block = b"XYZXYZtail".to_vec();
2914
2915 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2916 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2917 Sequence::Triple {
2918 literals,
2919 offset,
2920 match_len,
2921 } => {
2922 decoded.extend_from_slice(literals);
2923 let start = decoded.len() - offset;
2924 for i in 0..match_len {
2925 let byte = decoded[start + i];
2926 decoded.push(byte);
2927 }
2928 }
2929 };
2930
2931 matcher.add_data(first_block.clone(), |_| {});
2932 let mut reconstructed = Vec::new();
2933 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
2934 assert_eq!(reconstructed, first_block);
2935
2936 matcher.add_data(second_block.clone(), |_| {});
2937 let mut saw_cross_boundary = false;
2938 let prefix_len = reconstructed.len();
2939 matcher.start_matching(|seq| {
2940 if let Sequence::Triple {
2941 literals,
2942 offset,
2943 match_len,
2944 } = seq
2945 && literals.is_empty()
2946 && offset == 3
2947 && match_len >= ROW_MIN_MATCH_LEN
2948 {
2949 saw_cross_boundary = true;
2950 }
2951 replay_sequence(&mut reconstructed, seq);
2952 });
2953
2954 assert!(
2955 saw_cross_boundary,
2956 "row matcher should reuse the 3-byte previous-block tail"
2957 );
2958 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
2959}
2960
2961#[test]
2962fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
2963 let mut driver = MatchGeneratorDriver::new(32, 2);
2964
2965 driver.reset(CompressionLevel::Level(2));
2966 let mut space = driver.get_next_space();
2967 space[..12].copy_from_slice(b"abcabcabcabc");
2968 space.truncate(12);
2969 driver.commit_space(space);
2970 driver.skip_matching();
2971
2972 let table_len = driver.dfast_matcher().short_hash.len();
2973 assert_eq!(
2974 table_len,
2975 1 << DFAST_HASH_BITS,
2976 "unhinted Level(2) should keep default dfast table size"
2977 );
2978}
2979
2980#[test]
2981fn simple_backend_rejects_undersized_pooled_suffix_store() {
2982 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2983 driver.reset(CompressionLevel::Fastest);
2984
2985 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
2986
2987 let mut space = driver.get_next_space();
2988 space.clear();
2989 space.resize(4096, 0xAB);
2990 driver.commit_space(space);
2991
2992 let last_suffix_slots = driver
2993 .match_generator
2994 .window
2995 .last()
2996 .expect("window entry must exist after commit")
2997 .suffixes
2998 .slots
2999 .len();
3000 assert!(
3001 last_suffix_slots >= 4096,
3002 "undersized pooled suffix store must not be reused for larger blocks"
3003 );
3004}
3005
3006#[test]
3007fn source_hint_clamps_driver_slice_size_to_window() {
3008 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3009 driver.set_source_size_hint(1024);
3010 driver.reset(CompressionLevel::Default);
3011
3012 let window = driver.window_size() as usize;
3013 assert_eq!(window, 1024);
3014 assert_eq!(driver.slice_size, window);
3015
3016 let space = driver.get_next_space();
3017 assert_eq!(space.len(), window);
3018 driver.commit_space(space);
3019}
3020
3021#[test]
3022fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3023 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3024 driver.reset(CompressionLevel::Default);
3025
3026 let large = driver.get_next_space();
3027 let large_capacity = large.capacity();
3028 assert!(large_capacity >= 128 * 1024);
3029 driver.commit_space(large);
3030
3031 driver.set_source_size_hint(1024);
3032 driver.reset(CompressionLevel::Default);
3033
3034 let small = driver.get_next_space();
3035 assert_eq!(small.len(), 1024);
3036 assert!(
3037 small.capacity() >= large_capacity,
3038 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3039 );
3040}
3041
3042#[test]
3043fn driver_best_to_fastest_releases_oversized_hc_tables() {
3044 let mut driver = MatchGeneratorDriver::new(32, 2);
3045
3046 driver.reset(CompressionLevel::Best);
3048 assert_eq!(driver.window_size(), (1u64 << 24));
3049
3050 let mut space = driver.get_next_space();
3052 space[..12].copy_from_slice(b"abcabcabcabc");
3053 space.truncate(12);
3054 driver.commit_space(space);
3055 driver.skip_matching();
3056
3057 driver.reset(CompressionLevel::Fastest);
3059 assert_eq!(driver.window_size(), (1u64 << 17));
3060
3061 let hc = driver.hc_match_generator.as_ref().unwrap();
3063 assert!(
3064 hc.hash_table.is_empty(),
3065 "HC hash_table should be released after switching away from Best"
3066 );
3067 assert!(
3068 hc.chain_table.is_empty(),
3069 "HC chain_table should be released after switching away from Best"
3070 );
3071}
3072
3073#[test]
3074fn driver_better_to_best_resizes_hc_tables() {
3075 let mut driver = MatchGeneratorDriver::new(32, 2);
3076
3077 driver.reset(CompressionLevel::Better);
3079 assert_eq!(driver.window_size(), (1u64 << 23));
3080
3081 let mut space = driver.get_next_space();
3082 space[..12].copy_from_slice(b"abcabcabcabc");
3083 space.truncate(12);
3084 driver.commit_space(space);
3085 driver.skip_matching();
3086
3087 let hc = driver.hc_match_generator.as_ref().unwrap();
3088 let better_hash_len = hc.hash_table.len();
3089 let better_chain_len = hc.chain_table.len();
3090
3091 driver.reset(CompressionLevel::Best);
3093 assert_eq!(driver.window_size(), (1u64 << 24));
3094
3095 let mut space = driver.get_next_space();
3097 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3098 space.truncate(12);
3099 driver.commit_space(space);
3100 driver.skip_matching();
3101
3102 let hc = driver.hc_match_generator.as_ref().unwrap();
3103 assert!(
3104 hc.hash_table.len() > better_hash_len,
3105 "Best hash_table ({}) should be larger than Better ({})",
3106 hc.hash_table.len(),
3107 better_hash_len
3108 );
3109 assert!(
3110 hc.chain_table.len() > better_chain_len,
3111 "Best chain_table ({}) should be larger than Better ({})",
3112 hc.chain_table.len(),
3113 better_chain_len
3114 );
3115}
3116
3117#[test]
3118fn prime_with_dictionary_preserves_history_for_first_full_block() {
3119 let mut driver = MatchGeneratorDriver::new(8, 1);
3120 driver.reset(CompressionLevel::Fastest);
3121
3122 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3123
3124 let mut space = driver.get_next_space();
3125 space.clear();
3126 space.extend_from_slice(b"abcdefgh");
3127 driver.commit_space(space);
3128
3129 let mut saw_match = false;
3130 driver.start_matching(|seq| {
3131 if let Sequence::Triple {
3132 literals,
3133 offset,
3134 match_len,
3135 } = seq
3136 && literals.is_empty()
3137 && offset == 8
3138 && match_len >= MIN_MATCH_LEN
3139 {
3140 saw_match = true;
3141 }
3142 });
3143
3144 assert!(
3145 saw_match,
3146 "first full block should still match dictionary-primed history"
3147 );
3148}
3149
3150#[test]
3151fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3152 let mut driver = MatchGeneratorDriver::new(8, 1);
3153 driver.reset(CompressionLevel::Fastest);
3154
3155 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3156
3157 let mut space = driver.get_next_space();
3158 space.clear();
3159 space.extend_from_slice(b"abcdefgh");
3160 driver.commit_space(space);
3161
3162 let mut saw_match = false;
3163 driver.start_matching(|seq| {
3164 if let Sequence::Triple {
3165 literals,
3166 offset,
3167 match_len,
3168 } = seq
3169 && literals.is_empty()
3170 && offset == 24
3171 && match_len >= MIN_MATCH_LEN
3172 {
3173 saw_match = true;
3174 }
3175 });
3176
3177 assert!(
3178 saw_match,
3179 "dictionary bytes should remain addressable until frame output exceeds the live window"
3180 );
3181}
3182
3183#[test]
3184fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3185 let mut driver = MatchGeneratorDriver::new(8, 1);
3186 driver.reset(CompressionLevel::Fastest);
3187
3188 driver.prime_with_dictionary(&[], [11, 7, 3]);
3189
3190 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3191}
3192
3193#[test]
3194fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3195 let mut driver = MatchGeneratorDriver::new(8, 1);
3196 driver.reset(CompressionLevel::Level(2));
3197
3198 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3199
3200 let mut space = driver.get_next_space();
3201 space.clear();
3202 space.extend_from_slice(b"abcdefgh");
3203 driver.commit_space(space);
3204
3205 let mut saw_match = false;
3206 driver.start_matching(|seq| {
3207 if let Sequence::Triple {
3208 literals,
3209 offset,
3210 match_len,
3211 } = seq
3212 && literals.is_empty()
3213 && offset == 8
3214 && match_len >= DFAST_MIN_MATCH_LEN
3215 {
3216 saw_match = true;
3217 }
3218 });
3219
3220 assert!(
3221 saw_match,
3222 "dfast backend should match dictionary-primed history in first full block"
3223 );
3224}
3225
3226#[test]
3227fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3228 let mut driver = MatchGeneratorDriver::new(8, 1);
3229 driver.reset(CompressionLevel::Fastest);
3230
3231 let before = driver.window_size();
3232 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3233 let after = driver.window_size();
3234
3235 assert_eq!(
3236 after, before,
3237 "dictionary retention budget must not change reported frame window size"
3238 );
3239}
3240
3241#[test]
3242fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3243 let mut driver = MatchGeneratorDriver::new(8, 2);
3244 driver.reset(CompressionLevel::Fastest);
3245
3246 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3249
3250 assert!(
3251 driver
3252 .match_generator
3253 .window
3254 .iter()
3255 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3256 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3257 );
3258}
3259
3260#[test]
3261fn prime_with_dictionary_counts_only_committed_tail_budget() {
3262 let mut driver = MatchGeneratorDriver::new(8, 1);
3263 driver.reset(CompressionLevel::Fastest);
3264
3265 let before = driver.match_generator.max_window_size;
3266 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3268
3269 assert_eq!(
3270 driver.match_generator.max_window_size,
3271 before + 8,
3272 "retention budget must account only for dictionary bytes actually committed to history"
3273 );
3274}
3275
3276#[test]
3277fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3278 let mut driver = MatchGeneratorDriver::new(8, 1);
3279 driver.reset(CompressionLevel::Level(2));
3280
3281 let before = driver.dfast_matcher().max_window_size;
3282 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3285
3286 assert_eq!(
3287 driver.dfast_matcher().max_window_size,
3288 before + 12,
3289 "dfast retention budget should include 4-byte dictionary tails"
3290 );
3291}
3292
3293#[test]
3294fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3295 let mut driver = MatchGeneratorDriver::new(8, 1);
3296 driver.reset(CompressionLevel::Level(4));
3297
3298 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3299
3300 let mut space = driver.get_next_space();
3301 space.clear();
3302 space.extend_from_slice(b"abcdefgh");
3303 driver.commit_space(space);
3304
3305 let mut saw_match = false;
3306 driver.start_matching(|seq| {
3307 if let Sequence::Triple {
3308 literals,
3309 offset,
3310 match_len,
3311 } = seq
3312 && literals.is_empty()
3313 && offset == 8
3314 && match_len >= ROW_MIN_MATCH_LEN
3315 {
3316 saw_match = true;
3317 }
3318 });
3319
3320 assert!(
3321 saw_match,
3322 "row backend should match dictionary-primed history in first full block"
3323 );
3324}
3325
3326#[test]
3327fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3328 let mut driver = MatchGeneratorDriver::new(8, 1);
3329 driver.reset(CompressionLevel::Level(4));
3330
3331 let base_window = driver.row_matcher().max_window_size;
3332 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3335
3336 assert_eq!(
3337 driver.row_matcher().max_window_size,
3338 base_window + 8,
3339 "row retained window must exclude uncommitted 1-byte tail"
3340 );
3341}
3342
3343#[test]
3344fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3345 let mut driver = MatchGeneratorDriver::new(8, 1);
3346 driver.reset(CompressionLevel::Level(4));
3347 driver.row_matcher_mut().max_window_size = 8;
3349 driver.reported_window_size = 8;
3350
3351 let base_window = driver.row_matcher().max_window_size;
3352 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3353 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3354
3355 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3356 let mut space = driver.get_next_space();
3357 space.clear();
3358 space.extend_from_slice(block);
3359 driver.commit_space(space);
3360 driver.skip_matching();
3361 }
3362
3363 assert_eq!(
3364 driver.dictionary_retained_budget, 0,
3365 "dictionary budget should be fully retired once primed dict slices are evicted"
3366 );
3367 assert_eq!(
3368 driver.row_matcher().max_window_size,
3369 base_window,
3370 "retired dictionary budget must not remain reusable for live history"
3371 );
3372}
3373
3374#[test]
3375fn row_get_last_space_and_reset_to_fastest_clears_window() {
3376 let mut driver = MatchGeneratorDriver::new(8, 1);
3377 driver.reset(CompressionLevel::Level(4));
3378
3379 let mut space = driver.get_next_space();
3380 space.clear();
3381 space.extend_from_slice(b"row-data");
3382 driver.commit_space(space);
3383
3384 assert_eq!(driver.get_last_space(), b"row-data");
3385
3386 driver.reset(CompressionLevel::Fastest);
3387 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3388 assert!(driver.row_matcher().window.is_empty());
3389}
3390
3391#[test]
3393fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3394 let mut driver = MatchGeneratorDriver::new(8, 1);
3395 driver.reset(CompressionLevel::Level(4));
3396 assert_eq!(driver.active_backend, MatcherBackend::Row);
3397
3398 let _ = driver.row_matcher();
3401 let mut space = driver.get_next_space();
3402 space.extend_from_slice(b"row-data-to-recycle");
3403 driver.commit_space(space);
3404
3405 let before_pool = driver.vec_pool.len();
3406 driver.reset(CompressionLevel::Fastest);
3407
3408 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3409 let row = driver
3410 .row_match_generator
3411 .as_ref()
3412 .expect("row matcher should remain allocated after switch");
3413 assert!(row.row_heads.is_empty());
3414 assert!(row.row_positions.is_empty());
3415 assert!(row.row_tags.is_empty());
3416 assert!(
3417 driver.vec_pool.len() >= before_pool,
3418 "row reset should recycle row history buffers"
3419 );
3420}
3421
3422#[test]
3424fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
3425 let mut driver = MatchGeneratorDriver::new(8, 1);
3426 driver.active_backend = MatcherBackend::Row;
3427 driver.row_match_generator = None;
3428
3429 driver.reset(CompressionLevel::Fastest);
3430
3431 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3432}
3433
3434#[test]
3435fn adjust_params_for_zero_source_size_uses_min_window_floor() {
3436 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
3437 params.window_log = 22;
3438 let adjusted = adjust_params_for_source_size(params, 0);
3439 assert_eq!(adjusted.window_log, MIN_WINDOW_LOG);
3440}
3441
3442#[test]
3443fn row_pick_lazy_returns_none_when_next_is_better() {
3444 let mut matcher = RowMatchGenerator::new(1 << 22);
3445 matcher.configure(ROW_CONFIG);
3446 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
3447 matcher.ensure_tables();
3448
3449 let abs_pos = matcher.history_abs_start + 16;
3450 let best = MatchCandidate {
3451 start: abs_pos,
3452 offset: 8,
3453 match_len: ROW_MIN_MATCH_LEN,
3454 };
3455 assert!(
3456 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3457 "lazy picker should defer when next position is clearly better"
3458 );
3459}
3460
3461#[test]
3462fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
3463 let mut matcher = RowMatchGenerator::new(1 << 22);
3464 matcher.configure(ROW_CONFIG);
3465 matcher.lazy_depth = 2;
3466 matcher.search_depth = 0;
3467 matcher.offset_hist = [6, 9, 1];
3468
3469 let mut data = alloc::vec![b'x'; 40];
3470 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
3471 matcher.add_data(data, |_| {});
3472 matcher.ensure_tables();
3473
3474 let abs_pos = matcher.history_abs_start + 20;
3475 let best = matcher
3476 .best_match(abs_pos, 0)
3477 .expect("expected baseline repcode match");
3478 assert_eq!(best.offset, 9);
3479 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3480
3481 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
3482 assert!(next.match_len <= best.match_len);
3483 }
3484
3485 let next2 = matcher
3486 .best_match(abs_pos + 2, 2)
3487 .expect("expected +2 candidate");
3488 assert!(
3489 next2.match_len > best.match_len + 1,
3490 "+2 candidate must be significantly better for depth-2 lazy skip"
3491 );
3492 assert!(
3493 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3494 "lazy picker should defer when +2 candidate is significantly better"
3495 );
3496}
3497
3498#[test]
3499fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
3500 let mut matcher = RowMatchGenerator::new(1 << 22);
3501 matcher.configure(ROW_CONFIG);
3502 matcher.lazy_depth = 2;
3503 matcher.search_depth = 0;
3504 matcher.offset_hist = [6, 9, 1];
3505
3506 let mut data = alloc::vec![b'x'; 40];
3507 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
3508 matcher.add_data(data, |_| {});
3509 matcher.ensure_tables();
3510
3511 let abs_pos = matcher.history_abs_start + 20;
3512 let best = matcher
3513 .best_match(abs_pos, 0)
3514 .expect("expected baseline repcode match");
3515 assert_eq!(best.offset, 9);
3516 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3517
3518 let next2 = matcher
3519 .best_match(abs_pos + 2, 2)
3520 .expect("expected +2 candidate");
3521 assert_eq!(next2.match_len, best.match_len + 1);
3522 let chosen = matcher
3523 .pick_lazy_match(abs_pos, 0, Some(best))
3524 .expect("lazy picker should keep current best");
3525 assert_eq!(chosen.start, best.start);
3526 assert_eq!(chosen.offset, best.offset);
3527 assert_eq!(chosen.match_len, best.match_len);
3528}
3529
3530#[test]
3532fn row_hash_and_row_extracts_high_bits() {
3533 let mut matcher = RowMatchGenerator::new(1 << 22);
3534 matcher.configure(ROW_CONFIG);
3535 matcher.add_data(
3536 alloc::vec![
3537 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
3538 0x70, 0x80,
3539 ],
3540 |_| {},
3541 );
3542 matcher.ensure_tables();
3543
3544 let pos = matcher.history_abs_start + 8;
3545 let (row, tag) = matcher
3546 .hash_and_row(pos)
3547 .expect("row hash should be available");
3548
3549 let idx = pos - matcher.history_abs_start;
3550 let concat = matcher.live_history();
3551 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
3552 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
3553 let hash = value.wrapping_mul(PRIME);
3554 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
3555 let combined = hash >> (u64::BITS as usize - total_bits);
3556 let expected_row =
3557 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
3558 let expected_tag = combined as u8;
3559
3560 assert_eq!(row, expected_row);
3561 assert_eq!(tag, expected_tag);
3562}
3563
3564#[test]
3565fn row_repcode_skips_candidate_before_history_start() {
3566 let mut matcher = RowMatchGenerator::new(1 << 22);
3567 matcher.configure(ROW_CONFIG);
3568 matcher.history = alloc::vec![b'a'; 20];
3569 matcher.history_start = 0;
3570 matcher.history_abs_start = 10;
3571 matcher.offset_hist = [3, 0, 0];
3572
3573 assert!(matcher.repcode_candidate(12, 1).is_none());
3574}
3575
3576#[test]
3577fn row_repcode_returns_none_when_position_too_close_to_history_end() {
3578 let mut matcher = RowMatchGenerator::new(1 << 22);
3579 matcher.configure(ROW_CONFIG);
3580 matcher.history = b"abcde".to_vec();
3581 matcher.history_start = 0;
3582 matcher.history_abs_start = 0;
3583 matcher.offset_hist = [1, 0, 0];
3584
3585 assert!(matcher.repcode_candidate(4, 1).is_none());
3586}
3587
3588#[test]
3589fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
3590 let mut matcher = RowMatchGenerator::new(1 << 22);
3591 matcher.configure(ROW_CONFIG);
3592 matcher.history = b"abcde".to_vec();
3593 matcher.history_start = 0;
3594 matcher.history_abs_start = 0;
3595
3596 assert!(matcher.row_candidate(0, 0).is_none());
3597}
3598
3599#[test]
3600fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
3601 let mut hc = HcMatchGenerator::new(32);
3602 hc.history = b"abc".to_vec();
3603 hc.history_start = 0;
3604 hc.history_abs_start = 0;
3605 hc.ensure_tables();
3606
3607 let candidates = hc.chain_candidates(0);
3608 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
3609}
3610
3611#[test]
3612fn hc_reset_refills_existing_tables_with_empty_sentinel() {
3613 let mut hc = HcMatchGenerator::new(32);
3614 hc.add_data(b"abcdeabcde".to_vec(), |_| {});
3615 hc.ensure_tables();
3616 assert!(!hc.hash_table.is_empty());
3617 assert!(!hc.chain_table.is_empty());
3618 hc.hash_table.fill(123);
3619 hc.chain_table.fill(456);
3620
3621 hc.reset(|_| {});
3622
3623 assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
3624 assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
3625}
3626
3627#[test]
3628fn hc_start_matching_returns_early_for_empty_current_block() {
3629 let mut hc = HcMatchGenerator::new(32);
3630 hc.add_data(Vec::new(), |_| {});
3631 let mut called = false;
3632 hc.start_matching(|_| called = true);
3633 assert!(!called, "empty current block should not emit sequences");
3634}
3635
3636#[test]
3637fn hc_compact_history_drains_when_threshold_crossed() {
3638 let mut hc = HcMatchGenerator::new(8);
3639 hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
3640 hc.history_start = 16;
3641 hc.compact_history();
3642 assert_eq!(hc.history_start, 0);
3643 assert_eq!(hc.history, b"qrstuvwxyz");
3644}
3645
3646#[test]
3647fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
3648 let mut hc = HcMatchGenerator::new(32);
3649 hc.history = b"abcdefghijklmnop".to_vec();
3650 hc.history_abs_start = 0;
3651 hc.position_base = 1;
3652 hc.ensure_tables();
3653 let before_hash = hc.hash_table.clone();
3654 let before_chain = hc.chain_table.clone();
3655
3656 hc.insert_position_no_rebase(0);
3657
3658 assert_eq!(hc.hash_table, before_hash);
3659 assert_eq!(hc.chain_table, before_chain);
3660}
3661
3662#[test]
3663fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
3664 let mut driver = MatchGeneratorDriver::new(8, 1);
3665 driver.reset(CompressionLevel::Fastest);
3666 driver.match_generator.max_window_size = 8;
3669 driver.reported_window_size = 8;
3670
3671 let base_window = driver.match_generator.max_window_size;
3672 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3673 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
3674
3675 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3676 let mut space = driver.get_next_space();
3677 space.clear();
3678 space.extend_from_slice(block);
3679 driver.commit_space(space);
3680 driver.skip_matching();
3681 }
3682
3683 assert_eq!(
3684 driver.dictionary_retained_budget, 0,
3685 "dictionary budget should be fully retired once primed dict slices are evicted"
3686 );
3687 assert_eq!(
3688 driver.match_generator.max_window_size, base_window,
3689 "retired dictionary budget must not remain reusable for live history"
3690 );
3691}
3692
3693#[test]
3694fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
3695 let mut driver = MatchGeneratorDriver::new(8, 1);
3696 driver.reset(CompressionLevel::Level(2));
3697 driver.dfast_matcher_mut().max_window_size = 8;
3700 driver.reported_window_size = 8;
3701
3702 let base_window = driver.dfast_matcher().max_window_size;
3703 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3704 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
3705
3706 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3707 let mut space = driver.get_next_space();
3708 space.clear();
3709 space.extend_from_slice(block);
3710 driver.commit_space(space);
3711 driver.skip_matching();
3712 }
3713
3714 assert_eq!(
3715 driver.dictionary_retained_budget, 0,
3716 "dictionary budget should be fully retired once primed dict slices are evicted"
3717 );
3718 assert_eq!(
3719 driver.dfast_matcher().max_window_size,
3720 base_window,
3721 "retired dictionary budget must not remain reusable for live history"
3722 );
3723}
3724
3725#[test]
3726fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
3727 let mut driver = MatchGeneratorDriver::new(8, 1);
3728 driver.reset(CompressionLevel::Better);
3729
3730 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3731
3732 let mut space = driver.get_next_space();
3733 space.clear();
3734 space.extend_from_slice(b"abcdefgh");
3737 driver.commit_space(space);
3738
3739 let mut saw_match = false;
3740 driver.start_matching(|seq| {
3741 if let Sequence::Triple {
3742 literals,
3743 offset,
3744 match_len,
3745 } = seq
3746 && literals.is_empty()
3747 && offset == 8
3748 && match_len >= HC_MIN_MATCH_LEN
3749 {
3750 saw_match = true;
3751 }
3752 });
3753
3754 assert!(
3755 saw_match,
3756 "hash-chain backend should match dictionary-primed history in first full block"
3757 );
3758}
3759
3760#[test]
3761fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
3762 let mut driver = MatchGeneratorDriver::new(8, 1);
3763 driver.reset(CompressionLevel::Better);
3764 driver.hc_matcher_mut().max_window_size = 8;
3766 driver.reported_window_size = 8;
3767
3768 let base_window = driver.hc_matcher().max_window_size;
3769 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3770 assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
3771
3772 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3773 let mut space = driver.get_next_space();
3774 space.clear();
3775 space.extend_from_slice(block);
3776 driver.commit_space(space);
3777 driver.skip_matching();
3778 }
3779
3780 assert_eq!(
3781 driver.dictionary_retained_budget, 0,
3782 "dictionary budget should be fully retired once primed dict slices are evicted"
3783 );
3784 assert_eq!(
3785 driver.hc_matcher().max_window_size,
3786 base_window,
3787 "retired dictionary budget must not remain reusable for live history"
3788 );
3789}
3790
3791#[test]
3792fn hc_rebases_positions_after_u32_boundary() {
3793 let mut matcher = HcMatchGenerator::new(64);
3794 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3795 matcher.ensure_tables();
3796 matcher.position_base = 0;
3797 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3798 Ok(value) => value,
3799 Err(_) => return,
3800 };
3801 matcher.history_abs_start = history_abs_start;
3804 matcher.skip_matching();
3805 assert_eq!(
3806 matcher.position_base, matcher.history_abs_start,
3807 "rebase should anchor to the oldest live absolute position"
3808 );
3809
3810 assert!(
3811 matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
3812 "HC hash table should still be populated after crossing u32 boundary"
3813 );
3814
3815 let abs_pos = matcher.history_abs_start + 10;
3817 let candidates = matcher.chain_candidates(abs_pos);
3818 assert!(
3819 candidates.iter().any(|candidate| *candidate != usize::MAX),
3820 "chain_candidates should return valid matches after rebase"
3821 );
3822}
3823
3824#[test]
3825fn hc_rebase_rebuilds_only_inserted_prefix() {
3826 let mut matcher = HcMatchGenerator::new(64);
3827 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3828 matcher.ensure_tables();
3829 matcher.position_base = 0;
3830 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3831 Ok(value) => value,
3832 Err(_) => return,
3833 };
3834 matcher.history_abs_start = history_abs_start;
3835 let abs_pos = matcher.history_abs_start + 6;
3836
3837 let mut expected = HcMatchGenerator::new(64);
3838 expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3839 expected.ensure_tables();
3840 expected.history_abs_start = history_abs_start;
3841 expected.position_base = expected.history_abs_start;
3842 expected.hash_table.fill(HC_EMPTY);
3843 expected.chain_table.fill(HC_EMPTY);
3844 for pos in expected.history_abs_start..abs_pos {
3845 expected.insert_position_no_rebase(pos);
3846 }
3847
3848 matcher.maybe_rebase_positions(abs_pos);
3849
3850 assert_eq!(
3851 matcher.position_base, matcher.history_abs_start,
3852 "rebase should still anchor to the oldest live absolute position"
3853 );
3854 assert_eq!(
3855 matcher.hash_table, expected.hash_table,
3856 "rebase must rebuild only positions already inserted before abs_pos"
3857 );
3858 assert_eq!(
3859 matcher.chain_table, expected.chain_table,
3860 "future positions must not be pre-seeded into HC chains during rebase"
3861 );
3862}
3863
3864#[test]
3865fn suffix_store_with_single_slot_does_not_panic_on_keying() {
3866 let mut suffixes = SuffixStore::with_capacity(1);
3867 suffixes.insert(b"abcde", 0);
3868 assert!(suffixes.contains_key(b"abcde"));
3869 assert_eq!(suffixes.get(b"abcde"), Some(0));
3870}
3871
3872#[test]
3873fn fastest_reset_uses_interleaved_hash_fill_step() {
3874 let mut driver = MatchGeneratorDriver::new(32, 2);
3875
3876 driver.reset(CompressionLevel::Uncompressed);
3877 assert_eq!(driver.match_generator.hash_fill_step, 1);
3878
3879 driver.reset(CompressionLevel::Fastest);
3880 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
3881
3882 driver.reset(CompressionLevel::Better);
3885 assert_eq!(driver.active_backend, MatcherBackend::HashChain);
3886 assert_eq!(driver.window_size(), (1u64 << 23));
3887 assert_eq!(driver.hc_matcher().lazy_depth, 2);
3888}
3889
3890#[test]
3891fn simple_matcher_updates_offset_history_after_emitting_match() {
3892 let mut matcher = MatchGenerator::new(64);
3893 matcher.add_data(
3894 b"abcdeabcdeabcde".to_vec(),
3895 SuffixStore::with_capacity(64),
3896 |_, _| {},
3897 );
3898
3899 assert!(matcher.next_sequence(|seq| {
3900 assert_eq!(
3901 seq,
3902 Sequence::Triple {
3903 literals: b"abcde",
3904 offset: 5,
3905 match_len: 10,
3906 }
3907 );
3908 }));
3909 assert_eq!(matcher.offset_hist, [5, 1, 4]);
3910}
3911
3912#[test]
3913fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
3914 let mut matcher = MatchGenerator::new(64);
3915 matcher.add_data(
3916 b"abcdefghijabcdefghij".to_vec(),
3917 SuffixStore::with_capacity(64),
3918 |_, _| {},
3919 );
3920
3921 matcher.suffix_idx = 10;
3922 matcher.last_idx_in_sequence = 10;
3923 matcher.offset_hist = [99, 10, 4];
3924
3925 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3926 assert_eq!(candidate, Some((10, 10)));
3927}
3928
3929#[test]
3930fn simple_matcher_repcode_can_target_previous_window_entry() {
3931 let mut matcher = MatchGenerator::new(64);
3932 matcher.add_data(
3933 b"abcdefghij".to_vec(),
3934 SuffixStore::with_capacity(64),
3935 |_, _| {},
3936 );
3937 matcher.skip_matching();
3938 matcher.add_data(
3939 b"abcdefghij".to_vec(),
3940 SuffixStore::with_capacity(64),
3941 |_, _| {},
3942 );
3943
3944 matcher.offset_hist = [99, 10, 4];
3945
3946 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
3947 assert_eq!(candidate, Some((10, 10)));
3948}
3949
3950#[test]
3951fn simple_matcher_zero_literal_repcode_checks_rep2() {
3952 let mut matcher = MatchGenerator::new(64);
3953 matcher.add_data(
3954 b"abcdefghijabcdefghij".to_vec(),
3955 SuffixStore::with_capacity(64),
3956 |_, _| {},
3957 );
3958 matcher.suffix_idx = 10;
3959 matcher.last_idx_in_sequence = 10;
3960 matcher.offset_hist = [99, 4, 10];
3962
3963 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3964 assert_eq!(candidate, Some((10, 10)));
3965}
3966
3967#[test]
3968fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
3969 let mut matcher = MatchGenerator::new(64);
3970 matcher.add_data(
3971 b"abcdefghijabcdefghij".to_vec(),
3972 SuffixStore::with_capacity(64),
3973 |_, _| {},
3974 );
3975 matcher.suffix_idx = 10;
3976 matcher.last_idx_in_sequence = 10;
3977 matcher.offset_hist = [11, 4, 99];
3979
3980 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3981 assert_eq!(candidate, Some((10, 10)));
3982}
3983
3984#[test]
3985fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
3986 let mut matcher = MatchGenerator::new(64);
3987 matcher.add_data(
3988 b"abcdefghij".to_vec(),
3989 SuffixStore::with_capacity(64),
3990 |_, _| {},
3991 );
3992 matcher.skip_matching();
3993 matcher.add_data(
3994 b"klmnopqrst".to_vec(),
3995 SuffixStore::with_capacity(64),
3996 |_, _| {},
3997 );
3998 matcher.suffix_idx = 3;
3999
4000 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4001 assert_eq!(candidate, None);
4002}
4003
4004#[test]
4005fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4006 let mut matcher = MatchGenerator::new(64);
4007 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4008 matcher.add_data(
4009 b"abcdefghijklmnop".to_vec(),
4010 SuffixStore::with_capacity(64),
4011 |_, _| {},
4012 );
4013 matcher.skip_matching();
4014 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4015
4016 assert!(matcher.next_sequence(|seq| {
4017 assert_eq!(
4018 seq,
4019 Sequence::Triple {
4020 literals: b"",
4021 offset: 15,
4022 match_len: 5,
4023 }
4024 );
4025 }));
4026 assert!(!matcher.next_sequence(|_| {}));
4027}
4028
4029#[test]
4030fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4031 let mut matcher = MatchGenerator::new(64);
4032 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4033 matcher.add_data(
4034 b"01234abcde".to_vec(),
4035 SuffixStore::with_capacity(64),
4036 |_, _| {},
4037 );
4038 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4039
4040 let last = matcher.window.last().unwrap();
4041 let tail = &last.data[5..10];
4042 assert_eq!(last.suffixes.get(tail), Some(5));
4043}
4044
4045#[test]
4046fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4047 let mut matcher = MatchGenerator::new(128);
4048 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4049 matcher.add_data(
4050 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4051 SuffixStore::with_capacity(1 << 16),
4052 |_, _| {},
4053 );
4054
4055 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4056
4057 let last = matcher.window.last().unwrap();
4058 let first_key = &last.data[..MIN_MATCH_LEN];
4059 assert_eq!(last.suffixes.get(first_key), None);
4060}
4061
4062#[test]
4063fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4064 let mut matcher = MatchGenerator::new(128);
4065 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4066 matcher.add_data(
4067 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4068 SuffixStore::with_capacity(1 << 16),
4069 |_, _| {},
4070 );
4071
4072 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4073
4074 let last = matcher.window.last().unwrap();
4075 for pos in [0usize, 3, 6, 9, 12] {
4076 let key = &last.data[pos..pos + MIN_MATCH_LEN];
4077 assert_eq!(
4078 last.suffixes.get(key),
4079 Some(pos),
4080 "expected interleaved suffix registration at pos {pos}"
4081 );
4082 }
4083}
4084
4085#[test]
4086fn dfast_skip_matching_handles_window_eviction() {
4087 let mut matcher = DfastMatchGenerator::new(16);
4088
4089 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4090 matcher.skip_matching();
4091 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4092 matcher.skip_matching();
4093 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4094
4095 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4096 matcher.start_matching(|seq| match seq {
4097 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4098 Sequence::Triple {
4099 literals,
4100 offset,
4101 match_len,
4102 } => {
4103 reconstructed.extend_from_slice(literals);
4104 let start = reconstructed.len() - offset;
4105 for i in 0..match_len {
4106 let byte = reconstructed[start + i];
4107 reconstructed.push(byte);
4108 }
4109 }
4110 });
4111
4112 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4113}
4114
4115#[test]
4116fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4117 let mut matcher = DfastMatchGenerator::new(8);
4118
4119 let mut first = Vec::with_capacity(64);
4120 first.extend_from_slice(b"abcdefgh");
4121 matcher.add_data(first, |_| {});
4122
4123 let mut second = Vec::with_capacity(64);
4124 second.extend_from_slice(b"ijklmnop");
4125
4126 let mut observed_evicted_len = None;
4127 matcher.add_data(second, |data| {
4128 observed_evicted_len = Some(data.len());
4129 });
4130
4131 assert_eq!(
4132 observed_evicted_len,
4133 Some(8),
4134 "eviction callback must report evicted byte length, not backing capacity"
4135 );
4136}
4137
4138#[test]
4139fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4140 let mut matcher = DfastMatchGenerator::new(16);
4141
4142 let mut first = Vec::with_capacity(64);
4143 first.extend_from_slice(b"abcdefgh");
4144 matcher.add_data(first, |_| {});
4145
4146 let mut second = Vec::with_capacity(64);
4147 second.extend_from_slice(b"ijklmnop");
4148 matcher.add_data(second, |_| {});
4149
4150 matcher.max_window_size = 8;
4151
4152 let mut observed_evicted_len = None;
4153 matcher.trim_to_window(|data| {
4154 observed_evicted_len = Some(data.len());
4155 });
4156
4157 assert_eq!(
4158 observed_evicted_len,
4159 Some(8),
4160 "trim callback must report evicted byte length, not backing capacity"
4161 );
4162}
4163
4164#[test]
4165fn dfast_inserts_tail_positions_for_next_block_matching() {
4166 let mut matcher = DfastMatchGenerator::new(1 << 22);
4167
4168 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4169 let mut history = Vec::new();
4170 matcher.start_matching(|seq| match seq {
4171 Sequence::Literals { literals } => history.extend_from_slice(literals),
4172 Sequence::Triple { .. } => unreachable!("first block should not match history"),
4173 });
4174 assert_eq!(history, b"012345bcdea");
4175
4176 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4177 let mut saw_first_sequence = false;
4178 matcher.start_matching(|seq| {
4179 assert!(!saw_first_sequence, "expected a single cross-block match");
4180 saw_first_sequence = true;
4181 match seq {
4182 Sequence::Literals { .. } => {
4183 panic!("expected tail-anchored cross-block match before any literals")
4184 }
4185 Sequence::Triple {
4186 literals,
4187 offset,
4188 match_len,
4189 } => {
4190 assert_eq!(literals, b"");
4191 assert_eq!(offset, 5);
4192 assert_eq!(match_len, 11);
4193 let start = history.len() - offset;
4194 for i in 0..match_len {
4195 let byte = history[start + i];
4196 history.push(byte);
4197 }
4198 }
4199 }
4200 });
4201
4202 assert!(
4203 saw_first_sequence,
4204 "expected tail-anchored cross-block match"
4205 );
4206 assert_eq!(history, b"012345bcdeabcdeabcdeab");
4207}