1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
11use core::arch::aarch64::{uint8x16_t, vceqq_u8, vgetq_lane_u64, vld1q_u8, vreinterpretq_u64_u8};
12#[cfg(target_arch = "x86")]
13use core::arch::x86::{
14 __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
15 _mm256_loadu_si256, _mm256_movemask_epi8,
16};
17#[cfg(target_arch = "x86_64")]
18use core::arch::x86_64::{
19 __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
20 _mm256_loadu_si256, _mm256_movemask_epi8,
21};
22use core::convert::TryInto;
23use core::num::NonZeroUsize;
24
25use super::CompressionLevel;
26use super::Matcher;
27use super::Sequence;
28use super::blocks::encode_offset_with_history;
29#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
30use std::arch::is_aarch64_feature_detected;
31#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
32use std::arch::is_x86_feature_detected;
33#[cfg(feature = "std")]
34use std::sync::OnceLock;
35
36const MIN_MATCH_LEN: usize = 5;
37const FAST_HASH_FILL_STEP: usize = 3;
38const DFAST_MIN_MATCH_LEN: usize = 6;
39const ROW_MIN_MATCH_LEN: usize = 6;
40const DFAST_TARGET_LEN: usize = 48;
41const DFAST_HASH_BITS: usize = 20;
44const DFAST_SEARCH_DEPTH: usize = 4;
45const DFAST_EMPTY_SLOT: usize = usize::MAX;
46const ROW_HASH_BITS: usize = 20;
47const ROW_LOG: usize = 5;
48const ROW_SEARCH_DEPTH: usize = 16;
49const ROW_TARGET_LEN: usize = 48;
50const ROW_TAG_BITS: usize = 8;
51const ROW_EMPTY_SLOT: usize = usize::MAX;
52const ROW_HASH_KEY_LEN: usize = 4;
53
54const HC_HASH_LOG: usize = 20;
55const HC_CHAIN_LOG: usize = 19;
56const HC_SEARCH_DEPTH: usize = 16;
57const HC_MIN_MATCH_LEN: usize = 5;
58const HC_TARGET_LEN: usize = 48;
59const HC_EMPTY: u32 = 0;
62
63const MAX_HC_SEARCH_DEPTH: usize = 32;
66
67#[derive(Copy, Clone, Debug, Eq, PartialEq)]
68enum PrefixKernel {
69 Scalar,
70 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
71 X86Sse2,
72 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
73 X86Avx2,
74 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
75 Aarch64Neon,
76}
77
78#[derive(Copy, Clone)]
81struct HcConfig {
82 hash_log: usize,
83 chain_log: usize,
84 search_depth: usize,
85 target_len: usize,
86}
87
88#[derive(Copy, Clone)]
89struct RowConfig {
90 hash_bits: usize,
91 row_log: usize,
92 search_depth: usize,
93 target_len: usize,
94}
95
96const HC_CONFIG: HcConfig = HcConfig {
97 hash_log: HC_HASH_LOG,
98 chain_log: HC_CHAIN_LOG,
99 search_depth: HC_SEARCH_DEPTH,
100 target_len: HC_TARGET_LEN,
101};
102
103const BEST_HC_CONFIG: HcConfig = HcConfig {
105 hash_log: 21,
106 chain_log: 20,
107 search_depth: 32,
108 target_len: 128,
109};
110
111const ROW_CONFIG: RowConfig = RowConfig {
112 hash_bits: ROW_HASH_BITS,
113 row_log: ROW_LOG,
114 search_depth: ROW_SEARCH_DEPTH,
115 target_len: ROW_TARGET_LEN,
116};
117
118#[derive(Copy, Clone)]
120struct LevelParams {
121 backend: MatcherBackend,
122 window_log: u8,
123 hash_fill_step: usize,
124 lazy_depth: u8,
125 hc: HcConfig,
126 row: RowConfig,
127}
128
129fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
130 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
131 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
132}
133
134fn row_hash_bits_for_window(max_window_size: usize) -> usize {
135 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
136 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
137}
138
139#[rustfmt::skip]
148const LEVEL_TABLE: [LevelParams; 22] = [
149 LevelParams { backend: MatcherBackend::Simple, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
152 LevelParams { backend: MatcherBackend::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
153 LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
154 LevelParams { backend: MatcherBackend::Row, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
155 LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
156 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
157 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
158 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
159 LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
160 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
161 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
162 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
163 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
164 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
165 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
166 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
167 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
168 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
169 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
170 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
171 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
172 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
173];
174
175const MIN_WINDOW_LOG: u8 = 10;
177const MIN_HINTED_WINDOW_LOG: u8 = 14;
183
184fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
194 let src_log = if src_size == 0 {
199 MIN_WINDOW_LOG
200 } else {
201 (64 - (src_size - 1).leading_zeros()) as u8 };
203 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
204 if src_log < params.window_log {
205 params.window_log = src_log;
206 }
207 if params.backend == MatcherBackend::HashChain {
211 if (src_log + 2) < params.hc.hash_log as u8 {
212 params.hc.hash_log = (src_log + 2) as usize;
213 }
214 if (src_log + 1) < params.hc.chain_log as u8 {
215 params.hc.chain_log = (src_log + 1) as usize;
216 }
217 } else if params.backend == MatcherBackend::Row {
218 let max_window_size = 1usize << params.window_log;
219 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
220 }
221 params
222}
223
224fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
227 let params = match level {
228 CompressionLevel::Uncompressed => LevelParams {
229 backend: MatcherBackend::Simple,
230 window_log: 17,
231 hash_fill_step: 1,
232 lazy_depth: 0,
233 hc: HC_CONFIG,
234 row: ROW_CONFIG,
235 },
236 CompressionLevel::Fastest => LEVEL_TABLE[0],
237 CompressionLevel::Default => LEVEL_TABLE[2],
238 CompressionLevel::Better => LEVEL_TABLE[6],
239 CompressionLevel::Best => LEVEL_TABLE[10],
240 CompressionLevel::Level(n) => {
241 if n > 0 {
242 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
243 LEVEL_TABLE[idx]
244 } else if n == 0 {
245 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
247 } else {
248 let acceleration =
252 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
253 let step = (acceleration + 3).min(128);
254 LevelParams {
255 backend: MatcherBackend::Simple,
256 window_log: 17,
257 hash_fill_step: step,
258 lazy_depth: 0,
259 hc: HC_CONFIG,
260 row: ROW_CONFIG,
261 }
262 }
263 }
264 };
265 if let Some(size) = source_size {
266 adjust_params_for_source_size(params, size)
267 } else {
268 params
269 }
270}
271
272#[derive(Copy, Clone, Debug, PartialEq, Eq)]
273enum MatcherBackend {
274 Simple,
275 Dfast,
276 Row,
277 HashChain,
278}
279
280pub struct MatchGeneratorDriver {
282 vec_pool: Vec<Vec<u8>>,
283 suffix_pool: Vec<SuffixStore>,
284 match_generator: MatchGenerator,
285 dfast_match_generator: Option<DfastMatchGenerator>,
286 row_match_generator: Option<RowMatchGenerator>,
287 hc_match_generator: Option<HcMatchGenerator>,
288 active_backend: MatcherBackend,
289 slice_size: usize,
290 base_slice_size: usize,
291 reported_window_size: usize,
294 dictionary_retained_budget: usize,
297 source_size_hint: Option<u64>,
299}
300
301impl MatchGeneratorDriver {
302 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
307 let max_window_size = max_slices_in_window * slice_size;
308 Self {
309 vec_pool: Vec::new(),
310 suffix_pool: Vec::new(),
311 match_generator: MatchGenerator::new(max_window_size),
312 dfast_match_generator: None,
313 row_match_generator: None,
314 hc_match_generator: None,
315 active_backend: MatcherBackend::Simple,
316 slice_size,
317 base_slice_size: slice_size,
318 reported_window_size: max_window_size,
319 dictionary_retained_budget: 0,
320 source_size_hint: None,
321 }
322 }
323
324 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
325 resolve_level_params(level, source_size)
326 }
327
328 fn dfast_matcher(&self) -> &DfastMatchGenerator {
329 self.dfast_match_generator
330 .as_ref()
331 .expect("dfast backend must be initialized by reset() before use")
332 }
333
334 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
335 self.dfast_match_generator
336 .as_mut()
337 .expect("dfast backend must be initialized by reset() before use")
338 }
339
340 fn row_matcher(&self) -> &RowMatchGenerator {
341 self.row_match_generator
342 .as_ref()
343 .expect("row backend must be initialized by reset() before use")
344 }
345
346 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
347 self.row_match_generator
348 .as_mut()
349 .expect("row backend must be initialized by reset() before use")
350 }
351
352 fn hc_matcher(&self) -> &HcMatchGenerator {
353 self.hc_match_generator
354 .as_ref()
355 .expect("hash chain backend must be initialized by reset() before use")
356 }
357
358 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
359 self.hc_match_generator
360 .as_mut()
361 .expect("hash chain backend must be initialized by reset() before use")
362 }
363
364 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
365 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
366 if reclaimed == 0 {
367 return;
368 }
369 self.dictionary_retained_budget -= reclaimed;
370 match self.active_backend {
371 MatcherBackend::Simple => {
372 self.match_generator.max_window_size = self
373 .match_generator
374 .max_window_size
375 .saturating_sub(reclaimed);
376 }
377 MatcherBackend::Dfast => {
378 let matcher = self.dfast_matcher_mut();
379 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
380 }
381 MatcherBackend::Row => {
382 let matcher = self.row_matcher_mut();
383 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
384 }
385 MatcherBackend::HashChain => {
386 let matcher = self.hc_matcher_mut();
387 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
388 }
389 }
390 }
391
392 fn trim_after_budget_retire(&mut self) {
393 loop {
394 let mut evicted_bytes = 0usize;
395 match self.active_backend {
396 MatcherBackend::Simple => {
397 let vec_pool = &mut self.vec_pool;
398 let suffix_pool = &mut self.suffix_pool;
399 self.match_generator.reserve(0, |mut data, mut suffixes| {
400 evicted_bytes += data.len();
401 data.resize(data.capacity(), 0);
402 vec_pool.push(data);
403 suffixes.slots.clear();
404 suffixes.slots.resize(suffixes.slots.capacity(), None);
405 suffix_pool.push(suffixes);
406 });
407 }
408 MatcherBackend::Dfast => {
409 let mut retired = Vec::new();
410 self.dfast_matcher_mut().trim_to_window(|data| {
411 evicted_bytes += data.len();
412 retired.push(data);
413 });
414 for mut data in retired {
415 data.resize(data.capacity(), 0);
416 self.vec_pool.push(data);
417 }
418 }
419 MatcherBackend::Row => {
420 let mut retired = Vec::new();
421 self.row_matcher_mut().trim_to_window(|data| {
422 evicted_bytes += data.len();
423 retired.push(data);
424 });
425 for mut data in retired {
426 data.resize(data.capacity(), 0);
427 self.vec_pool.push(data);
428 }
429 }
430 MatcherBackend::HashChain => {
431 let mut retired = Vec::new();
432 self.hc_matcher_mut().trim_to_window(|data| {
433 evicted_bytes += data.len();
434 retired.push(data);
435 });
436 for mut data in retired {
437 data.resize(data.capacity(), 0);
438 self.vec_pool.push(data);
439 }
440 }
441 }
442 if evicted_bytes == 0 {
443 break;
444 }
445 self.retire_dictionary_budget(evicted_bytes);
446 }
447 }
448}
449
450impl Matcher for MatchGeneratorDriver {
451 fn supports_dictionary_priming(&self) -> bool {
452 true
453 }
454
455 fn set_source_size_hint(&mut self, size: u64) {
456 self.source_size_hint = Some(size);
457 }
458
459 fn reset(&mut self, level: CompressionLevel) {
460 let hint = self.source_size_hint.take();
461 let hinted = hint.is_some();
462 let params = Self::level_params(level, hint);
463 let max_window_size = 1usize << params.window_log;
464 self.dictionary_retained_budget = 0;
465 if self.active_backend != params.backend {
466 match self.active_backend {
467 MatcherBackend::Simple => {
468 let vec_pool = &mut self.vec_pool;
469 let suffix_pool = &mut self.suffix_pool;
470 self.match_generator.reset(|mut data, mut suffixes| {
471 data.resize(data.capacity(), 0);
472 vec_pool.push(data);
473 suffixes.slots.clear();
474 suffixes.slots.resize(suffixes.slots.capacity(), None);
475 suffix_pool.push(suffixes);
476 });
477 }
478 MatcherBackend::Dfast => {
479 if let Some(dfast) = self.dfast_match_generator.as_mut() {
480 let vec_pool = &mut self.vec_pool;
481 dfast.reset(|mut data| {
482 data.resize(data.capacity(), 0);
483 vec_pool.push(data);
484 });
485 }
486 }
487 MatcherBackend::Row => {
488 if let Some(row) = self.row_match_generator.as_mut() {
489 row.row_heads = Vec::new();
490 row.row_positions = Vec::new();
491 row.row_tags = Vec::new();
492 let vec_pool = &mut self.vec_pool;
493 row.reset(|mut data| {
494 data.resize(data.capacity(), 0);
495 vec_pool.push(data);
496 });
497 }
498 }
499 MatcherBackend::HashChain => {
500 if let Some(hc) = self.hc_match_generator.as_mut() {
501 hc.hash_table = Vec::new();
504 hc.chain_table = Vec::new();
505 let vec_pool = &mut self.vec_pool;
506 hc.reset(|mut data| {
507 data.resize(data.capacity(), 0);
508 vec_pool.push(data);
509 });
510 }
511 }
512 }
513 }
514
515 self.active_backend = params.backend;
516 self.slice_size = self.base_slice_size.min(max_window_size);
517 self.reported_window_size = max_window_size;
518 match self.active_backend {
519 MatcherBackend::Simple => {
520 let vec_pool = &mut self.vec_pool;
521 let suffix_pool = &mut self.suffix_pool;
522 self.match_generator.max_window_size = max_window_size;
523 self.match_generator.hash_fill_step = params.hash_fill_step;
524 self.match_generator.reset(|mut data, mut suffixes| {
525 data.resize(data.capacity(), 0);
526 vec_pool.push(data);
527 suffixes.slots.clear();
528 suffixes.slots.resize(suffixes.slots.capacity(), None);
529 suffix_pool.push(suffixes);
530 });
531 }
532 MatcherBackend::Dfast => {
533 let dfast = self
534 .dfast_match_generator
535 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
536 dfast.max_window_size = max_window_size;
537 dfast.lazy_depth = params.lazy_depth;
538 dfast.set_hash_bits(if hinted {
539 dfast_hash_bits_for_window(max_window_size)
540 } else {
541 DFAST_HASH_BITS
542 });
543 let vec_pool = &mut self.vec_pool;
544 dfast.reset(|mut data| {
545 data.resize(data.capacity(), 0);
546 vec_pool.push(data);
547 });
548 }
549 MatcherBackend::Row => {
550 let row = self
551 .row_match_generator
552 .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
553 row.max_window_size = max_window_size;
554 row.lazy_depth = params.lazy_depth;
555 row.configure(params.row);
556 if hinted {
557 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
558 }
559 let vec_pool = &mut self.vec_pool;
560 row.reset(|mut data| {
561 data.resize(data.capacity(), 0);
562 vec_pool.push(data);
563 });
564 }
565 MatcherBackend::HashChain => {
566 let hc = self
567 .hc_match_generator
568 .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
569 hc.max_window_size = max_window_size;
570 hc.lazy_depth = params.lazy_depth;
571 hc.configure(params.hc);
572 let vec_pool = &mut self.vec_pool;
573 hc.reset(|mut data| {
574 data.resize(data.capacity(), 0);
575 vec_pool.push(data);
576 });
577 }
578 }
579 }
580
581 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
582 match self.active_backend {
583 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
584 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
585 MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
586 MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
587 }
588
589 if dict_content.is_empty() {
590 return;
591 }
592
593 let retained_dict_budget = dict_content.len();
596 match self.active_backend {
597 MatcherBackend::Simple => {
598 self.match_generator.max_window_size = self
599 .match_generator
600 .max_window_size
601 .saturating_add(retained_dict_budget);
602 }
603 MatcherBackend::Dfast => {
604 let matcher = self.dfast_matcher_mut();
605 matcher.max_window_size =
606 matcher.max_window_size.saturating_add(retained_dict_budget);
607 }
608 MatcherBackend::Row => {
609 let matcher = self.row_matcher_mut();
610 matcher.max_window_size =
611 matcher.max_window_size.saturating_add(retained_dict_budget);
612 }
613 MatcherBackend::HashChain => {
614 let matcher = self.hc_matcher_mut();
615 matcher.max_window_size =
616 matcher.max_window_size.saturating_add(retained_dict_budget);
617 }
618 }
619
620 let mut start = 0usize;
621 let mut committed_dict_budget = 0usize;
622 let min_primed_tail = match self.active_backend {
626 MatcherBackend::Simple => MIN_MATCH_LEN,
627 MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
628 };
629 while start < dict_content.len() {
630 let end = (start + self.slice_size).min(dict_content.len());
631 if end - start < min_primed_tail {
632 break;
633 }
634 let mut space = self.get_next_space();
635 space.clear();
636 space.extend_from_slice(&dict_content[start..end]);
637 self.commit_space(space);
638 self.skip_matching();
639 committed_dict_budget += end - start;
640 start = end;
641 }
642
643 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
644 if uncommitted_tail_budget > 0 {
645 match self.active_backend {
646 MatcherBackend::Simple => {
647 self.match_generator.max_window_size = self
648 .match_generator
649 .max_window_size
650 .saturating_sub(uncommitted_tail_budget);
651 }
652 MatcherBackend::Dfast => {
653 let matcher = self.dfast_matcher_mut();
654 matcher.max_window_size = matcher
655 .max_window_size
656 .saturating_sub(uncommitted_tail_budget);
657 }
658 MatcherBackend::Row => {
659 let matcher = self.row_matcher_mut();
660 matcher.max_window_size = matcher
661 .max_window_size
662 .saturating_sub(uncommitted_tail_budget);
663 }
664 MatcherBackend::HashChain => {
665 let matcher = self.hc_matcher_mut();
666 matcher.max_window_size = matcher
667 .max_window_size
668 .saturating_sub(uncommitted_tail_budget);
669 }
670 }
671 }
672 if committed_dict_budget > 0 {
673 self.dictionary_retained_budget = self
674 .dictionary_retained_budget
675 .saturating_add(committed_dict_budget);
676 }
677 }
678
679 fn window_size(&self) -> u64 {
680 self.reported_window_size as u64
681 }
682
683 fn get_next_space(&mut self) -> Vec<u8> {
684 if let Some(mut space) = self.vec_pool.pop() {
685 if space.len() > self.slice_size {
686 space.truncate(self.slice_size);
687 }
688 if space.len() < self.slice_size {
689 space.resize(self.slice_size, 0);
690 }
691 return space;
692 }
693 alloc::vec![0; self.slice_size]
694 }
695
696 fn get_last_space(&mut self) -> &[u8] {
697 match self.active_backend {
698 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
699 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
700 MatcherBackend::Row => self.row_matcher().get_last_space(),
701 MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
702 }
703 }
704
705 fn commit_space(&mut self, space: Vec<u8>) {
706 match self.active_backend {
707 MatcherBackend::Simple => {
708 let vec_pool = &mut self.vec_pool;
709 let mut evicted_bytes = 0usize;
710 let suffixes = match self.suffix_pool.pop() {
711 Some(store) if store.slots.len() >= space.len() => store,
712 _ => SuffixStore::with_capacity(space.len()),
713 };
714 let suffix_pool = &mut self.suffix_pool;
715 self.match_generator
716 .add_data(space, suffixes, |mut data, mut suffixes| {
717 evicted_bytes += data.len();
718 data.resize(data.capacity(), 0);
719 vec_pool.push(data);
720 suffixes.slots.clear();
721 suffixes.slots.resize(suffixes.slots.capacity(), None);
722 suffix_pool.push(suffixes);
723 });
724 self.retire_dictionary_budget(evicted_bytes);
725 self.trim_after_budget_retire();
726 }
727 MatcherBackend::Dfast => {
728 let vec_pool = &mut self.vec_pool;
729 let mut evicted_bytes = 0usize;
730 self.dfast_match_generator
731 .as_mut()
732 .expect("dfast backend must be initialized by reset() before use")
733 .add_data(space, |mut data| {
734 evicted_bytes += data.len();
735 data.resize(data.capacity(), 0);
736 vec_pool.push(data);
737 });
738 self.retire_dictionary_budget(evicted_bytes);
739 self.trim_after_budget_retire();
740 }
741 MatcherBackend::Row => {
742 let vec_pool = &mut self.vec_pool;
743 let mut evicted_bytes = 0usize;
744 self.row_match_generator
745 .as_mut()
746 .expect("row backend must be initialized by reset() before use")
747 .add_data(space, |mut data| {
748 evicted_bytes += data.len();
749 data.resize(data.capacity(), 0);
750 vec_pool.push(data);
751 });
752 self.retire_dictionary_budget(evicted_bytes);
753 self.trim_after_budget_retire();
754 }
755 MatcherBackend::HashChain => {
756 let vec_pool = &mut self.vec_pool;
757 let mut evicted_bytes = 0usize;
758 self.hc_match_generator
759 .as_mut()
760 .expect("hash chain backend must be initialized by reset() before use")
761 .add_data(space, |mut data| {
762 evicted_bytes += data.len();
763 data.resize(data.capacity(), 0);
764 vec_pool.push(data);
765 });
766 self.retire_dictionary_budget(evicted_bytes);
767 self.trim_after_budget_retire();
768 }
769 }
770 }
771
772 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
773 match self.active_backend {
774 MatcherBackend::Simple => {
775 while self.match_generator.next_sequence(&mut handle_sequence) {}
776 }
777 MatcherBackend::Dfast => self
778 .dfast_matcher_mut()
779 .start_matching(&mut handle_sequence),
780 MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
781 MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
782 }
783 }
784 fn skip_matching(&mut self) {
785 match self.active_backend {
786 MatcherBackend::Simple => self.match_generator.skip_matching(),
787 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
788 MatcherBackend::Row => self.row_matcher_mut().skip_matching(),
789 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
790 }
791 }
792}
793
794struct SuffixStore {
797 slots: Vec<Option<NonZeroUsize>>,
801 len_log: u32,
802}
803
804impl SuffixStore {
805 fn with_capacity(capacity: usize) -> Self {
806 Self {
807 slots: alloc::vec![None; capacity],
808 len_log: capacity.ilog2(),
809 }
810 }
811
812 #[inline(always)]
813 fn insert(&mut self, suffix: &[u8], idx: usize) {
814 let key = self.key(suffix);
815 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
816 }
817
818 #[inline(always)]
819 fn contains_key(&self, suffix: &[u8]) -> bool {
820 let key = self.key(suffix);
821 self.slots[key].is_some()
822 }
823
824 #[inline(always)]
825 fn get(&self, suffix: &[u8]) -> Option<usize> {
826 let key = self.key(suffix);
827 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
828 }
829
830 #[inline(always)]
831 fn key(&self, suffix: &[u8]) -> usize {
832 if self.len_log == 0 {
834 return 0;
835 }
836
837 let s0 = suffix[0] as u64;
838 let s1 = suffix[1] as u64;
839 let s2 = suffix[2] as u64;
840 let s3 = suffix[3] as u64;
841 let s4 = suffix[4] as u64;
842
843 const POLY: u64 = 0xCF3BCCDCABu64;
844
845 let s0 = (s0 << 24).wrapping_mul(POLY);
846 let s1 = (s1 << 32).wrapping_mul(POLY);
847 let s2 = (s2 << 40).wrapping_mul(POLY);
848 let s3 = (s3 << 48).wrapping_mul(POLY);
849 let s4 = (s4 << 56).wrapping_mul(POLY);
850
851 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
852 let index = index >> (64 - self.len_log);
853 index as usize % self.slots.len()
854 }
855}
856
857struct WindowEntry {
860 data: Vec<u8>,
861 suffixes: SuffixStore,
863 base_offset: usize,
865}
866
867pub(crate) struct MatchGenerator {
868 max_window_size: usize,
869 window: Vec<WindowEntry>,
872 window_size: usize,
873 #[cfg(debug_assertions)]
874 concat_window: Vec<u8>,
875 suffix_idx: usize,
877 last_idx_in_sequence: usize,
879 hash_fill_step: usize,
880 offset_hist: [u32; 3],
881}
882
883impl MatchGenerator {
884 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
885 #[inline(always)]
886 const fn select_x86_prefix_kernel(has_avx2: bool, has_sse2: bool) -> PrefixKernel {
887 if has_avx2 {
888 return PrefixKernel::X86Avx2;
889 }
890 if has_sse2 {
891 return PrefixKernel::X86Sse2;
892 }
893 PrefixKernel::Scalar
894 }
895
896 #[cfg(feature = "std")]
897 #[inline(always)]
898 fn detect_prefix_kernel() -> PrefixKernel {
899 static KERNEL: OnceLock<PrefixKernel> = OnceLock::new();
900 *KERNEL.get_or_init(|| {
901 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
902 {
903 let kernel = Self::select_x86_prefix_kernel(
904 is_x86_feature_detected!("avx2"),
905 is_x86_feature_detected!("sse2"),
906 );
907 if kernel != PrefixKernel::Scalar {
908 return kernel;
909 }
910 }
911 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
912 {
913 if is_aarch64_feature_detected!("neon") {
914 return PrefixKernel::Aarch64Neon;
915 }
916 }
917 PrefixKernel::Scalar
918 })
919 }
920
921 #[cfg(not(feature = "std"))]
922 #[inline(always)]
923 fn detect_prefix_kernel() -> PrefixKernel {
924 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
925 {
926 let kernel = Self::select_x86_prefix_kernel(
927 cfg!(target_feature = "avx2"),
928 cfg!(target_feature = "sse2"),
929 );
930 if kernel != PrefixKernel::Scalar {
931 return kernel;
932 }
933 }
934 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
935 {
936 if cfg!(target_feature = "neon") {
937 return PrefixKernel::Aarch64Neon;
938 }
939 }
940 PrefixKernel::Scalar
941 }
942
943 #[inline(always)]
944 #[cfg(target_endian = "little")]
945 fn mismatch_byte_index(diff: usize) -> usize {
946 diff.trailing_zeros() as usize / 8
947 }
948
949 #[inline(always)]
950 #[cfg(target_endian = "big")]
951 fn mismatch_byte_index(diff: usize) -> usize {
952 diff.leading_zeros() as usize / 8
953 }
954
955 fn new(max_size: usize) -> Self {
957 Self {
958 max_window_size: max_size,
959 window: Vec::new(),
960 window_size: 0,
961 #[cfg(debug_assertions)]
962 concat_window: Vec::new(),
963 suffix_idx: 0,
964 last_idx_in_sequence: 0,
965 hash_fill_step: 1,
966 offset_hist: [1, 4, 8],
967 }
968 }
969
970 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
971 self.window_size = 0;
972 #[cfg(debug_assertions)]
973 self.concat_window.clear();
974 self.suffix_idx = 0;
975 self.last_idx_in_sequence = 0;
976 self.offset_hist = [1, 4, 8];
977 self.window.drain(..).for_each(|entry| {
978 reuse_space(entry.data, entry.suffixes);
979 });
980 }
981
982 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
987 loop {
988 let last_entry = self.window.last().unwrap();
989 let data_slice = &last_entry.data;
990
991 if self.suffix_idx >= data_slice.len() {
993 if self.last_idx_in_sequence != self.suffix_idx {
994 let literals = &data_slice[self.last_idx_in_sequence..];
995 self.last_idx_in_sequence = self.suffix_idx;
996 handle_sequence(Sequence::Literals { literals });
997 return true;
998 } else {
999 return false;
1000 }
1001 }
1002
1003 let data_slice = &data_slice[self.suffix_idx..];
1005 if data_slice.len() < MIN_MATCH_LEN {
1006 let last_idx_in_sequence = self.last_idx_in_sequence;
1007 self.last_idx_in_sequence = last_entry.data.len();
1008 self.suffix_idx = last_entry.data.len();
1009 handle_sequence(Sequence::Literals {
1010 literals: &last_entry.data[last_idx_in_sequence..],
1011 });
1012 return true;
1013 }
1014
1015 let key = &data_slice[..MIN_MATCH_LEN];
1017 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
1018
1019 let mut candidate = self.repcode_candidate(data_slice, literals_len);
1021 for match_entry in self.window.iter() {
1022 if let Some(match_index) = match_entry.suffixes.get(key) {
1023 let match_slice = &match_entry.data[match_index..];
1024
1025 let match_len = Self::common_prefix_len(match_slice, data_slice);
1027
1028 if match_len >= MIN_MATCH_LEN {
1030 let offset = match_entry.base_offset + self.suffix_idx - match_index;
1031
1032 #[cfg(debug_assertions)]
1034 {
1035 let unprocessed = last_entry.data.len() - self.suffix_idx;
1036 let start = self.concat_window.len() - unprocessed - offset;
1037 let end = start + match_len;
1038 let check_slice = &self.concat_window[start..end];
1039 debug_assert_eq!(check_slice, &match_slice[..match_len]);
1040 }
1041
1042 if let Some((old_offset, old_match_len)) = candidate {
1043 if match_len > old_match_len
1044 || (match_len == old_match_len && offset < old_offset)
1045 {
1046 candidate = Some((offset, match_len));
1047 }
1048 } else {
1049 candidate = Some((offset, match_len));
1050 }
1051 }
1052 }
1053 }
1054
1055 if let Some((offset, match_len)) = candidate {
1056 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
1059
1060 let last_entry = self.window.last().unwrap();
1062 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
1063
1064 self.suffix_idx += match_len;
1066 self.last_idx_in_sequence = self.suffix_idx;
1067 let _ = encode_offset_with_history(
1068 offset as u32,
1069 literals.len() as u32,
1070 &mut self.offset_hist,
1071 );
1072 handle_sequence(Sequence::Triple {
1073 literals,
1074 offset,
1075 match_len,
1076 });
1077
1078 return true;
1079 }
1080
1081 let last_entry = self.window.last_mut().unwrap();
1082 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
1083 if !last_entry.suffixes.contains_key(key) {
1084 last_entry.suffixes.insert(key, self.suffix_idx);
1085 }
1086 self.suffix_idx += 1;
1087 }
1088 }
1089
1090 #[inline(always)]
1092 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1093 let max = a.len().min(b.len());
1094 let mut off = 0usize;
1095 let lhs = a.as_ptr();
1096 let rhs = b.as_ptr();
1097
1098 match Self::detect_prefix_kernel() {
1099 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1100 PrefixKernel::X86Avx2 => {
1101 off = unsafe { Self::prefix_len_simd_avx2(lhs, rhs, max) };
1102 }
1103 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1104 PrefixKernel::X86Sse2 => {
1105 off = unsafe { Self::prefix_len_simd_sse2(lhs, rhs, max) };
1106 }
1107 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1108 PrefixKernel::Aarch64Neon => {
1109 off = unsafe { Self::prefix_len_simd_neon(lhs, rhs, max) };
1110 }
1111 PrefixKernel::Scalar => {}
1112 }
1113
1114 Self::common_prefix_len_scalar(a, b, off, max)
1115 }
1116
1117 #[inline(always)]
1118 fn common_prefix_len_scalar(a: &[u8], b: &[u8], mut off: usize, max: usize) -> usize {
1119 let chunk = core::mem::size_of::<usize>();
1120 let lhs = a.as_ptr();
1121 let rhs = b.as_ptr();
1122 while off + chunk <= max {
1123 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1124 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1125 let diff = lhs_word ^ rhs_word;
1126 if diff != 0 {
1127 return off + Self::mismatch_byte_index(diff);
1128 }
1129 off += chunk;
1130 }
1131 off + core::iter::zip(&a[off..max], &b[off..max])
1132 .take_while(|(x, y)| x == y)
1133 .count()
1134 }
1135
1136 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1137 #[target_feature(enable = "sse2")]
1138 unsafe fn prefix_len_simd_sse2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1139 let mut off = 0usize;
1140 while off + 16 <= max {
1141 let a: __m128i = unsafe { _mm_loadu_si128(lhs.add(off).cast::<__m128i>()) };
1142 let b: __m128i = unsafe { _mm_loadu_si128(rhs.add(off).cast::<__m128i>()) };
1143 let eq = _mm_cmpeq_epi8(a, b);
1144 let mask = _mm_movemask_epi8(eq) as u32;
1145 if mask != 0xFFFF {
1146 return off + (!mask).trailing_zeros() as usize;
1147 }
1148 off += 16;
1149 }
1150 off
1151 }
1152
1153 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1154 #[target_feature(enable = "avx2")]
1155 unsafe fn prefix_len_simd_avx2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1156 let mut off = 0usize;
1157 while off + 32 <= max {
1158 let a: __m256i = unsafe { _mm256_loadu_si256(lhs.add(off).cast::<__m256i>()) };
1159 let b: __m256i = unsafe { _mm256_loadu_si256(rhs.add(off).cast::<__m256i>()) };
1160 let eq = _mm256_cmpeq_epi8(a, b);
1161 let mask = _mm256_movemask_epi8(eq) as u32;
1162 if mask != u32::MAX {
1163 return off + (!mask).trailing_zeros() as usize;
1164 }
1165 off += 32;
1166 }
1167 off
1168 }
1169
1170 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1171 #[target_feature(enable = "neon")]
1172 unsafe fn prefix_len_simd_neon(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1173 let mut off = 0usize;
1174 while off + 16 <= max {
1175 let a: uint8x16_t = unsafe { vld1q_u8(lhs.add(off)) };
1176 let b: uint8x16_t = unsafe { vld1q_u8(rhs.add(off)) };
1177 let eq = vceqq_u8(a, b);
1178 let lanes = vreinterpretq_u64_u8(eq);
1179 let low = vgetq_lane_u64(lanes, 0);
1180 if low != u64::MAX {
1181 let diff = low ^ u64::MAX;
1182 return off + Self::mismatch_byte_index(diff as usize);
1183 }
1184 let high = vgetq_lane_u64(lanes, 1);
1185 if high != u64::MAX {
1186 let diff = high ^ u64::MAX;
1187 return off + 8 + Self::mismatch_byte_index(diff as usize);
1188 }
1189 off += 16;
1190 }
1191 off
1192 }
1193
1194 #[inline(always)]
1196 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1197 let start = self.suffix_idx;
1198 let last_entry = self.window.last_mut().unwrap();
1199 if last_entry.data.len() < MIN_MATCH_LEN {
1200 return;
1201 }
1202 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1203 if insert_limit > start {
1204 let data = last_entry.data.as_slice();
1205 let suffixes = &mut last_entry.suffixes;
1206 if fill_step == FAST_HASH_FILL_STEP {
1207 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1208 } else {
1209 let mut pos = start;
1210 while pos < insert_limit {
1211 Self::insert_suffix_if_absent(data, suffixes, pos);
1212 pos += fill_step;
1213 }
1214 }
1215 }
1216
1217 if idx >= start + MIN_MATCH_LEN {
1218 let tail_start = idx - MIN_MATCH_LEN;
1219 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1220 if !last_entry.suffixes.contains_key(tail_key) {
1221 last_entry.suffixes.insert(tail_key, tail_start);
1222 }
1223 }
1224 }
1225
1226 #[inline(always)]
1227 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1228 debug_assert!(
1229 pos + MIN_MATCH_LEN <= data.len(),
1230 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1231 pos,
1232 MIN_MATCH_LEN,
1233 data.len()
1234 );
1235 let key = &data[pos..pos + MIN_MATCH_LEN];
1236 if !suffixes.contains_key(key) {
1237 suffixes.insert(key, pos);
1238 }
1239 }
1240
1241 #[inline(always)]
1242 fn add_suffixes_interleaved_fast(
1243 data: &[u8],
1244 suffixes: &mut SuffixStore,
1245 start: usize,
1246 insert_limit: usize,
1247 ) {
1248 let lane = FAST_HASH_FILL_STEP;
1249 let mut pos = start;
1250
1251 while pos + lane * 3 < insert_limit {
1254 let p0 = pos;
1255 let p1 = pos + lane;
1256 let p2 = pos + lane * 2;
1257 let p3 = pos + lane * 3;
1258
1259 Self::insert_suffix_if_absent(data, suffixes, p0);
1260 Self::insert_suffix_if_absent(data, suffixes, p1);
1261 Self::insert_suffix_if_absent(data, suffixes, p2);
1262 Self::insert_suffix_if_absent(data, suffixes, p3);
1263
1264 pos += lane * 4;
1265 }
1266
1267 while pos < insert_limit {
1268 Self::insert_suffix_if_absent(data, suffixes, pos);
1269 pos += lane;
1270 }
1271 }
1272
1273 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1274 if literals_len != 0 {
1275 return None;
1276 }
1277
1278 let reps = [
1279 Some(self.offset_hist[1] as usize),
1280 Some(self.offset_hist[2] as usize),
1281 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1282 ];
1283
1284 let mut best: Option<(usize, usize)> = None;
1285 let mut seen = [0usize; 3];
1286 let mut seen_len = 0usize;
1287 for offset in reps.into_iter().flatten() {
1288 if offset == 0 {
1289 continue;
1290 }
1291 if seen[..seen_len].contains(&offset) {
1292 continue;
1293 }
1294 seen[seen_len] = offset;
1295 seen_len += 1;
1296
1297 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1298 continue;
1299 };
1300 if match_len < MIN_MATCH_LEN {
1301 continue;
1302 }
1303
1304 if best.is_none_or(|(old_offset, old_len)| {
1305 match_len > old_len || (match_len == old_len && offset < old_offset)
1306 }) {
1307 best = Some((offset, match_len));
1308 }
1309 }
1310 best
1311 }
1312
1313 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1314 if offset == 0 {
1315 return None;
1316 }
1317
1318 let last_idx = self.window.len().checked_sub(1)?;
1319 let last_entry = &self.window[last_idx];
1320 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1321 if offset > searchable_prefix {
1322 return None;
1323 }
1324
1325 let mut remaining = offset;
1326 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1327 (last_idx, self.suffix_idx - remaining)
1328 } else {
1329 remaining -= self.suffix_idx;
1330 let mut found = None;
1331 for entry_idx in (0..last_idx).rev() {
1332 let len = self.window[entry_idx].data.len();
1333 if remaining <= len {
1334 found = Some((entry_idx, len - remaining));
1335 break;
1336 }
1337 remaining -= len;
1338 }
1339 found?
1340 };
1341
1342 let match_entry = &self.window[entry_idx];
1343 let match_slice = &match_entry.data[match_index..];
1344
1345 Some(Self::common_prefix_len(match_slice, data_slice))
1346 }
1347
1348 fn skip_matching(&mut self) {
1350 let len = self.window.last().unwrap().data.len();
1351 self.add_suffixes_till(len, 1);
1352 self.suffix_idx = len;
1353 self.last_idx_in_sequence = len;
1354 }
1355
1356 fn add_data(
1359 &mut self,
1360 data: Vec<u8>,
1361 suffixes: SuffixStore,
1362 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1363 ) {
1364 assert!(
1365 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1366 );
1367 self.reserve(data.len(), reuse_space);
1368 #[cfg(debug_assertions)]
1369 self.concat_window.extend_from_slice(&data);
1370
1371 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1372 for entry in self.window.iter_mut() {
1373 entry.base_offset += last_len;
1374 }
1375 }
1376
1377 let len = data.len();
1378 self.window.push(WindowEntry {
1379 data,
1380 suffixes,
1381 base_offset: 0,
1382 });
1383 self.window_size += len;
1384 self.suffix_idx = 0;
1385 self.last_idx_in_sequence = 0;
1386 }
1387
1388 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1391 assert!(self.max_window_size >= amount);
1392 while self.window_size + amount > self.max_window_size {
1393 let removed = self.window.remove(0);
1394 self.window_size -= removed.data.len();
1395 #[cfg(debug_assertions)]
1396 self.concat_window.drain(0..removed.data.len());
1397
1398 let WindowEntry {
1399 suffixes,
1400 data: leaked_vec,
1401 base_offset: _,
1402 } = removed;
1403 reuse_space(leaked_vec, suffixes);
1404 }
1405 }
1406}
1407
1408struct DfastMatchGenerator {
1409 max_window_size: usize,
1410 window: VecDeque<Vec<u8>>,
1411 window_size: usize,
1412 history: Vec<u8>,
1415 history_start: usize,
1416 history_abs_start: usize,
1417 offset_hist: [u32; 3],
1418 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1419 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1420 hash_bits: usize,
1421 lazy_depth: u8,
1423}
1424
1425#[derive(Copy, Clone)]
1426struct MatchCandidate {
1427 start: usize,
1428 offset: usize,
1429 match_len: usize,
1430}
1431
1432fn best_len_offset_candidate(
1433 lhs: Option<MatchCandidate>,
1434 rhs: Option<MatchCandidate>,
1435) -> Option<MatchCandidate> {
1436 match (lhs, rhs) {
1437 (None, other) | (other, None) => other,
1438 (Some(lhs), Some(rhs)) => {
1439 if rhs.match_len > lhs.match_len
1440 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1441 {
1442 Some(rhs)
1443 } else {
1444 Some(lhs)
1445 }
1446 }
1447 }
1448}
1449
1450fn extend_backwards_shared(
1451 concat: &[u8],
1452 history_abs_start: usize,
1453 mut candidate_pos: usize,
1454 mut abs_pos: usize,
1455 mut match_len: usize,
1456 lit_len: usize,
1457) -> MatchCandidate {
1458 let min_abs_pos = abs_pos - lit_len;
1459 while abs_pos > min_abs_pos
1460 && candidate_pos > history_abs_start
1461 && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1462 {
1463 candidate_pos -= 1;
1464 abs_pos -= 1;
1465 match_len += 1;
1466 }
1467 MatchCandidate {
1468 start: abs_pos,
1469 offset: abs_pos - candidate_pos,
1470 match_len,
1471 }
1472}
1473
1474fn repcode_candidate_shared(
1475 concat: &[u8],
1476 history_abs_start: usize,
1477 offset_hist: [u32; 3],
1478 abs_pos: usize,
1479 lit_len: usize,
1480 min_match_len: usize,
1481) -> Option<MatchCandidate> {
1482 let reps = if lit_len == 0 {
1483 [
1484 Some(offset_hist[1] as usize),
1485 Some(offset_hist[2] as usize),
1486 (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1487 ]
1488 } else {
1489 [
1490 Some(offset_hist[0] as usize),
1491 Some(offset_hist[1] as usize),
1492 Some(offset_hist[2] as usize),
1493 ]
1494 };
1495
1496 let current_idx = abs_pos - history_abs_start;
1497 if current_idx + min_match_len > concat.len() {
1498 return None;
1499 }
1500
1501 let mut best = None;
1502 for rep in reps.into_iter().flatten() {
1503 if rep == 0 || rep > abs_pos {
1504 continue;
1505 }
1506 let candidate_pos = abs_pos - rep;
1507 if candidate_pos < history_abs_start {
1508 continue;
1509 }
1510 let candidate_idx = candidate_pos - history_abs_start;
1511 let match_len =
1512 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1513 if match_len >= min_match_len {
1514 let candidate = extend_backwards_shared(
1515 concat,
1516 history_abs_start,
1517 candidate_pos,
1518 abs_pos,
1519 match_len,
1520 lit_len,
1521 );
1522 best = best_len_offset_candidate(best, Some(candidate));
1523 }
1524 }
1525 best
1526}
1527
1528#[derive(Copy, Clone)]
1529struct LazyMatchConfig {
1530 target_len: usize,
1531 min_match_len: usize,
1532 lazy_depth: u8,
1533 history_abs_end: usize,
1534}
1535
1536fn pick_lazy_match_shared(
1537 abs_pos: usize,
1538 lit_len: usize,
1539 best: Option<MatchCandidate>,
1540 config: LazyMatchConfig,
1541 mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1542) -> Option<MatchCandidate> {
1543 let best = best?;
1544 if best.match_len >= config.target_len
1545 || abs_pos + 1 + config.min_match_len > config.history_abs_end
1546 {
1547 return Some(best);
1548 }
1549
1550 let next = best_match_at(abs_pos + 1, lit_len + 1);
1551 if let Some(next) = next
1552 && (next.match_len > best.match_len
1553 || (next.match_len == best.match_len && next.offset < best.offset))
1554 {
1555 return None;
1556 }
1557
1558 if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1559 let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1560 if let Some(next2) = next2
1561 && next2.match_len > best.match_len + 1
1562 {
1563 return None;
1564 }
1565 }
1566
1567 Some(best)
1568}
1569
1570impl DfastMatchGenerator {
1571 fn new(max_window_size: usize) -> Self {
1572 Self {
1573 max_window_size,
1574 window: VecDeque::new(),
1575 window_size: 0,
1576 history: Vec::new(),
1577 history_start: 0,
1578 history_abs_start: 0,
1579 offset_hist: [1, 4, 8],
1580 short_hash: Vec::new(),
1581 long_hash: Vec::new(),
1582 hash_bits: DFAST_HASH_BITS,
1583 lazy_depth: 1,
1584 }
1585 }
1586
1587 fn set_hash_bits(&mut self, bits: usize) {
1588 let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1589 if self.hash_bits != clamped {
1590 self.hash_bits = clamped;
1591 self.short_hash = Vec::new();
1592 self.long_hash = Vec::new();
1593 }
1594 }
1595
1596 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1597 self.window_size = 0;
1598 self.history.clear();
1599 self.history_start = 0;
1600 self.history_abs_start = 0;
1601 self.offset_hist = [1, 4, 8];
1602 if !self.short_hash.is_empty() {
1603 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1604 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1605 }
1606 for mut data in self.window.drain(..) {
1607 data.resize(data.capacity(), 0);
1608 reuse_space(data);
1609 }
1610 }
1611
1612 fn get_last_space(&self) -> &[u8] {
1613 self.window.back().unwrap().as_slice()
1614 }
1615
1616 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1617 assert!(data.len() <= self.max_window_size);
1618 while self.window_size + data.len() > self.max_window_size {
1619 let removed = self.window.pop_front().unwrap();
1620 self.window_size -= removed.len();
1621 self.history_start += removed.len();
1622 self.history_abs_start += removed.len();
1623 reuse_space(removed);
1624 }
1625 self.compact_history();
1626 self.history.extend_from_slice(&data);
1627 self.window_size += data.len();
1628 self.window.push_back(data);
1629 }
1630
1631 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1632 while self.window_size > self.max_window_size {
1633 let removed = self.window.pop_front().unwrap();
1634 self.window_size -= removed.len();
1635 self.history_start += removed.len();
1636 self.history_abs_start += removed.len();
1637 reuse_space(removed);
1638 }
1639 }
1640
1641 fn skip_matching(&mut self) {
1642 self.ensure_hash_tables();
1643 let current_len = self.window.back().unwrap().len();
1644 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1645 self.insert_positions(current_abs_start, current_abs_start + current_len);
1646 }
1647
1648 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1649 self.ensure_hash_tables();
1650
1651 let current_len = self.window.back().unwrap().len();
1652 if current_len == 0 {
1653 return;
1654 }
1655
1656 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1657
1658 let mut pos = 0usize;
1659 let mut literals_start = 0usize;
1660 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1661 let abs_pos = current_abs_start + pos;
1662 let lit_len = pos - literals_start;
1663
1664 let best = self.best_match(abs_pos, lit_len);
1665 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1666 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1667 let current = self.window.back().unwrap().as_slice();
1668 let start = candidate.start - current_abs_start;
1669 let literals = ¤t[literals_start..start];
1670 handle_sequence(Sequence::Triple {
1671 literals,
1672 offset: candidate.offset,
1673 match_len: candidate.match_len,
1674 });
1675 let _ = encode_offset_with_history(
1678 candidate.offset as u32,
1679 literals.len() as u32,
1680 &mut self.offset_hist,
1681 );
1682 pos = start + candidate.match_len;
1683 literals_start = pos;
1684 } else {
1685 self.insert_position(abs_pos);
1686 pos += 1;
1687 }
1688 }
1689
1690 if literals_start < current_len {
1691 let current = self.window.back().unwrap().as_slice();
1698 handle_sequence(Sequence::Literals {
1699 literals: ¤t[literals_start..],
1700 });
1701 }
1702 }
1703
1704 fn ensure_hash_tables(&mut self) {
1705 let table_len = 1usize << self.hash_bits;
1706 if self.short_hash.len() != table_len {
1707 self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1711 self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1712 }
1713 }
1714
1715 fn compact_history(&mut self) {
1716 if self.history_start == 0 {
1717 return;
1718 }
1719 if self.history_start >= self.max_window_size
1720 || self.history_start * 2 >= self.history.len()
1721 {
1722 self.history.drain(..self.history_start);
1723 self.history_start = 0;
1724 }
1725 }
1726
1727 fn live_history(&self) -> &[u8] {
1728 &self.history[self.history_start..]
1729 }
1730
1731 fn history_abs_end(&self) -> usize {
1732 self.history_abs_start + self.live_history().len()
1733 }
1734
1735 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1736 let rep = self.repcode_candidate(abs_pos, lit_len);
1737 let hash = self.hash_candidate(abs_pos, lit_len);
1738 best_len_offset_candidate(rep, hash)
1739 }
1740
1741 fn pick_lazy_match(
1742 &self,
1743 abs_pos: usize,
1744 lit_len: usize,
1745 best: Option<MatchCandidate>,
1746 ) -> Option<MatchCandidate> {
1747 pick_lazy_match_shared(
1748 abs_pos,
1749 lit_len,
1750 best,
1751 LazyMatchConfig {
1752 target_len: DFAST_TARGET_LEN,
1753 min_match_len: DFAST_MIN_MATCH_LEN,
1754 lazy_depth: self.lazy_depth,
1755 history_abs_end: self.history_abs_end(),
1756 },
1757 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1758 )
1759 }
1760
1761 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1762 repcode_candidate_shared(
1763 self.live_history(),
1764 self.history_abs_start,
1765 self.offset_hist,
1766 abs_pos,
1767 lit_len,
1768 DFAST_MIN_MATCH_LEN,
1769 )
1770 }
1771
1772 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1773 let concat = self.live_history();
1774 let current_idx = abs_pos - self.history_abs_start;
1775 let mut best = None;
1776 for candidate_pos in self.long_candidates(abs_pos) {
1777 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1778 continue;
1779 }
1780 let candidate_idx = candidate_pos - self.history_abs_start;
1781 let match_len =
1782 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1783 if match_len >= DFAST_MIN_MATCH_LEN {
1784 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1785 best = best_len_offset_candidate(best, Some(candidate));
1786 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1787 return best;
1788 }
1789 }
1790 }
1791
1792 for candidate_pos in self.short_candidates(abs_pos) {
1793 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1794 continue;
1795 }
1796 let candidate_idx = candidate_pos - self.history_abs_start;
1797 let match_len =
1798 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1799 if match_len >= DFAST_MIN_MATCH_LEN {
1800 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1801 best = best_len_offset_candidate(best, Some(candidate));
1802 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1803 return best;
1804 }
1805 }
1806 }
1807 best
1808 }
1809
1810 fn extend_backwards(
1811 &self,
1812 candidate_pos: usize,
1813 abs_pos: usize,
1814 match_len: usize,
1815 lit_len: usize,
1816 ) -> MatchCandidate {
1817 extend_backwards_shared(
1818 self.live_history(),
1819 self.history_abs_start,
1820 candidate_pos,
1821 abs_pos,
1822 match_len,
1823 lit_len,
1824 )
1825 }
1826
1827 fn insert_positions(&mut self, start: usize, end: usize) {
1828 for pos in start..end {
1829 self.insert_position(pos);
1830 }
1831 }
1832
1833 fn insert_position(&mut self, pos: usize) {
1834 let idx = pos - self.history_abs_start;
1835 let short = {
1836 let concat = self.live_history();
1837 (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1838 };
1839 if let Some(short) = short {
1840 let bucket = &mut self.short_hash[short];
1841 if bucket[0] != pos {
1842 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1843 bucket[0] = pos;
1844 }
1845 }
1846
1847 let long = {
1848 let concat = self.live_history();
1849 (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1850 };
1851 if let Some(long) = long {
1852 let bucket = &mut self.long_hash[long];
1853 if bucket[0] != pos {
1854 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1855 bucket[0] = pos;
1856 }
1857 }
1858 }
1859
1860 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1861 let concat = self.live_history();
1862 let idx = pos - self.history_abs_start;
1863 (idx + 4 <= concat.len())
1864 .then(|| self.short_hash[self.hash4(&concat[idx..])])
1865 .into_iter()
1866 .flatten()
1867 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1868 }
1869
1870 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1871 let concat = self.live_history();
1872 let idx = pos - self.history_abs_start;
1873 (idx + 8 <= concat.len())
1874 .then(|| self.long_hash[self.hash8(&concat[idx..])])
1875 .into_iter()
1876 .flatten()
1877 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1878 }
1879
1880 fn hash4(&self, data: &[u8]) -> usize {
1881 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1882 self.hash_index(value)
1883 }
1884
1885 fn hash8(&self, data: &[u8]) -> usize {
1886 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1887 self.hash_index(value)
1888 }
1889
1890 fn hash_index(&self, value: u64) -> usize {
1891 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1892 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1893 }
1894}
1895
1896struct RowMatchGenerator {
1897 max_window_size: usize,
1898 window: VecDeque<Vec<u8>>,
1899 window_size: usize,
1900 history: Vec<u8>,
1901 history_start: usize,
1902 history_abs_start: usize,
1903 offset_hist: [u32; 3],
1904 row_hash_log: usize,
1905 row_log: usize,
1906 search_depth: usize,
1907 target_len: usize,
1908 lazy_depth: u8,
1909 row_heads: Vec<u8>,
1910 row_positions: Vec<usize>,
1911 row_tags: Vec<u8>,
1912}
1913
1914impl RowMatchGenerator {
1915 fn new(max_window_size: usize) -> Self {
1916 Self {
1917 max_window_size,
1918 window: VecDeque::new(),
1919 window_size: 0,
1920 history: Vec::new(),
1921 history_start: 0,
1922 history_abs_start: 0,
1923 offset_hist: [1, 4, 8],
1924 row_hash_log: ROW_HASH_BITS - ROW_LOG,
1925 row_log: ROW_LOG,
1926 search_depth: ROW_SEARCH_DEPTH,
1927 target_len: ROW_TARGET_LEN,
1928 lazy_depth: 1,
1929 row_heads: Vec::new(),
1930 row_positions: Vec::new(),
1931 row_tags: Vec::new(),
1932 }
1933 }
1934
1935 fn set_hash_bits(&mut self, bits: usize) {
1936 let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
1937 let row_hash_log = clamped.saturating_sub(self.row_log);
1938 if self.row_hash_log != row_hash_log {
1939 self.row_hash_log = row_hash_log;
1940 self.row_heads.clear();
1941 self.row_positions.clear();
1942 self.row_tags.clear();
1943 }
1944 }
1945
1946 fn configure(&mut self, config: RowConfig) {
1947 self.row_log = config.row_log.clamp(4, 6);
1948 self.search_depth = config.search_depth;
1949 self.target_len = config.target_len;
1950 self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
1951 }
1952
1953 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1954 self.window_size = 0;
1955 self.history.clear();
1956 self.history_start = 0;
1957 self.history_abs_start = 0;
1958 self.offset_hist = [1, 4, 8];
1959 self.row_heads.fill(0);
1960 self.row_positions.fill(ROW_EMPTY_SLOT);
1961 self.row_tags.fill(0);
1962 for mut data in self.window.drain(..) {
1963 data.resize(data.capacity(), 0);
1964 reuse_space(data);
1965 }
1966 }
1967
1968 fn get_last_space(&self) -> &[u8] {
1969 self.window.back().unwrap().as_slice()
1970 }
1971
1972 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1973 assert!(data.len() <= self.max_window_size);
1974 while self.window_size + data.len() > self.max_window_size {
1975 let removed = self.window.pop_front().unwrap();
1976 self.window_size -= removed.len();
1977 self.history_start += removed.len();
1978 self.history_abs_start += removed.len();
1979 reuse_space(removed);
1980 }
1981 self.compact_history();
1982 self.history.extend_from_slice(&data);
1983 self.window_size += data.len();
1984 self.window.push_back(data);
1985 }
1986
1987 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1988 while self.window_size > self.max_window_size {
1989 let removed = self.window.pop_front().unwrap();
1990 self.window_size -= removed.len();
1991 self.history_start += removed.len();
1992 self.history_abs_start += removed.len();
1993 reuse_space(removed);
1994 }
1995 }
1996
1997 fn skip_matching(&mut self) {
1998 self.ensure_tables();
1999 let current_len = self.window.back().unwrap().len();
2000 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2001 let backfill_start = self.backfill_start(current_abs_start);
2002 if backfill_start < current_abs_start {
2003 self.insert_positions(backfill_start, current_abs_start);
2004 }
2005 self.insert_positions(current_abs_start, current_abs_start + current_len);
2006 }
2007
2008 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2009 self.ensure_tables();
2010
2011 let current_len = self.window.back().unwrap().len();
2012 if current_len == 0 {
2013 return;
2014 }
2015 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2016 let backfill_start = self.backfill_start(current_abs_start);
2017 if backfill_start < current_abs_start {
2018 self.insert_positions(backfill_start, current_abs_start);
2019 }
2020
2021 let mut pos = 0usize;
2022 let mut literals_start = 0usize;
2023 while pos + ROW_MIN_MATCH_LEN <= current_len {
2024 let abs_pos = current_abs_start + pos;
2025 let lit_len = pos - literals_start;
2026
2027 let best = self.best_match(abs_pos, lit_len);
2028 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2029 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2030 let current = self.window.back().unwrap().as_slice();
2031 let start = candidate.start - current_abs_start;
2032 let literals = ¤t[literals_start..start];
2033 handle_sequence(Sequence::Triple {
2034 literals,
2035 offset: candidate.offset,
2036 match_len: candidate.match_len,
2037 });
2038 let _ = encode_offset_with_history(
2039 candidate.offset as u32,
2040 literals.len() as u32,
2041 &mut self.offset_hist,
2042 );
2043 pos = start + candidate.match_len;
2044 literals_start = pos;
2045 } else {
2046 self.insert_position(abs_pos);
2047 pos += 1;
2048 }
2049 }
2050
2051 while pos + ROW_HASH_KEY_LEN <= current_len {
2052 self.insert_position(current_abs_start + pos);
2053 pos += 1;
2054 }
2055
2056 if literals_start < current_len {
2057 let current = self.window.back().unwrap().as_slice();
2058 handle_sequence(Sequence::Literals {
2059 literals: ¤t[literals_start..],
2060 });
2061 }
2062 }
2063
2064 fn ensure_tables(&mut self) {
2065 let row_count = 1usize << self.row_hash_log;
2066 let row_entries = 1usize << self.row_log;
2067 let total = row_count * row_entries;
2068 if self.row_positions.len() != total {
2069 self.row_heads = alloc::vec![0; row_count];
2070 self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
2071 self.row_tags = alloc::vec![0; total];
2072 }
2073 }
2074
2075 fn compact_history(&mut self) {
2076 if self.history_start == 0 {
2077 return;
2078 }
2079 if self.history_start >= self.max_window_size
2080 || self.history_start * 2 >= self.history.len()
2081 {
2082 self.history.drain(..self.history_start);
2083 self.history_start = 0;
2084 }
2085 }
2086
2087 fn live_history(&self) -> &[u8] {
2088 &self.history[self.history_start..]
2089 }
2090
2091 fn history_abs_end(&self) -> usize {
2092 self.history_abs_start + self.live_history().len()
2093 }
2094
2095 fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
2096 let idx = abs_pos - self.history_abs_start;
2097 let concat = self.live_history();
2098 if idx + ROW_HASH_KEY_LEN > concat.len() {
2099 return None;
2100 }
2101 let value =
2102 u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
2103 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2104 let hash = value.wrapping_mul(PRIME);
2105 let total_bits = self.row_hash_log + ROW_TAG_BITS;
2106 let combined = hash >> (u64::BITS as usize - total_bits);
2107 let row_mask = (1usize << self.row_hash_log) - 1;
2108 let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
2109 let tag = combined as u8;
2110 Some((row, tag))
2111 }
2112
2113 fn backfill_start(&self, current_abs_start: usize) -> usize {
2114 current_abs_start
2115 .saturating_sub(ROW_HASH_KEY_LEN - 1)
2116 .max(self.history_abs_start)
2117 }
2118
2119 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2120 let rep = self.repcode_candidate(abs_pos, lit_len);
2121 let row = self.row_candidate(abs_pos, lit_len);
2122 best_len_offset_candidate(rep, row)
2123 }
2124
2125 fn pick_lazy_match(
2126 &self,
2127 abs_pos: usize,
2128 lit_len: usize,
2129 best: Option<MatchCandidate>,
2130 ) -> Option<MatchCandidate> {
2131 pick_lazy_match_shared(
2132 abs_pos,
2133 lit_len,
2134 best,
2135 LazyMatchConfig {
2136 target_len: self.target_len,
2137 min_match_len: ROW_MIN_MATCH_LEN,
2138 lazy_depth: self.lazy_depth,
2139 history_abs_end: self.history_abs_end(),
2140 },
2141 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2142 )
2143 }
2144
2145 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2146 repcode_candidate_shared(
2147 self.live_history(),
2148 self.history_abs_start,
2149 self.offset_hist,
2150 abs_pos,
2151 lit_len,
2152 ROW_MIN_MATCH_LEN,
2153 )
2154 }
2155
2156 fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2157 let concat = self.live_history();
2158 let current_idx = abs_pos - self.history_abs_start;
2159 if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
2160 return None;
2161 }
2162
2163 let (row, tag) = self.hash_and_row(abs_pos)?;
2164 let row_entries = 1usize << self.row_log;
2165 let row_mask = row_entries - 1;
2166 let row_base = row << self.row_log;
2167 let head = self.row_heads[row] as usize;
2168 let max_walk = self.search_depth.min(row_entries);
2169
2170 let mut best = None;
2171 for i in 0..max_walk {
2172 let slot = (head + i) & row_mask;
2173 let idx = row_base + slot;
2174 if self.row_tags[idx] != tag {
2175 continue;
2176 }
2177 let candidate_pos = self.row_positions[idx];
2178 if candidate_pos == ROW_EMPTY_SLOT
2179 || candidate_pos < self.history_abs_start
2180 || candidate_pos >= abs_pos
2181 {
2182 continue;
2183 }
2184 let candidate_idx = candidate_pos - self.history_abs_start;
2185 let match_len =
2186 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2187 if match_len >= ROW_MIN_MATCH_LEN {
2188 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2189 best = best_len_offset_candidate(best, Some(candidate));
2190 if best.is_some_and(|best| best.match_len >= self.target_len) {
2191 return best;
2192 }
2193 }
2194 }
2195 best
2196 }
2197
2198 fn extend_backwards(
2199 &self,
2200 candidate_pos: usize,
2201 abs_pos: usize,
2202 match_len: usize,
2203 lit_len: usize,
2204 ) -> MatchCandidate {
2205 extend_backwards_shared(
2206 self.live_history(),
2207 self.history_abs_start,
2208 candidate_pos,
2209 abs_pos,
2210 match_len,
2211 lit_len,
2212 )
2213 }
2214
2215 fn insert_positions(&mut self, start: usize, end: usize) {
2216 for pos in start..end {
2217 self.insert_position(pos);
2218 }
2219 }
2220
2221 fn insert_position(&mut self, abs_pos: usize) {
2222 let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2223 return;
2224 };
2225 let row_entries = 1usize << self.row_log;
2226 let row_mask = row_entries - 1;
2227 let row_base = row << self.row_log;
2228 let head = self.row_heads[row] as usize;
2229 let next = head.wrapping_sub(1) & row_mask;
2230 self.row_heads[row] = next as u8;
2231 self.row_tags[row_base + next] = tag;
2232 self.row_positions[row_base + next] = abs_pos;
2233 }
2234}
2235
2236struct HcMatchGenerator {
2237 max_window_size: usize,
2238 window: VecDeque<Vec<u8>>,
2239 window_size: usize,
2240 history: Vec<u8>,
2241 history_start: usize,
2242 history_abs_start: usize,
2243 position_base: usize,
2244 offset_hist: [u32; 3],
2245 hash_table: Vec<u32>,
2246 chain_table: Vec<u32>,
2247 lazy_depth: u8,
2248 hash_log: usize,
2249 chain_log: usize,
2250 search_depth: usize,
2251 target_len: usize,
2252}
2253
2254impl HcMatchGenerator {
2255 fn new(max_window_size: usize) -> Self {
2256 Self {
2257 max_window_size,
2258 window: VecDeque::new(),
2259 window_size: 0,
2260 history: Vec::new(),
2261 history_start: 0,
2262 history_abs_start: 0,
2263 position_base: 0,
2264 offset_hist: [1, 4, 8],
2265 hash_table: Vec::new(),
2266 chain_table: Vec::new(),
2267 lazy_depth: 2,
2268 hash_log: HC_HASH_LOG,
2269 chain_log: HC_CHAIN_LOG,
2270 search_depth: HC_SEARCH_DEPTH,
2271 target_len: HC_TARGET_LEN,
2272 }
2273 }
2274
2275 fn configure(&mut self, config: HcConfig) {
2276 let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2277 self.hash_log = config.hash_log;
2278 self.chain_log = config.chain_log;
2279 self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2280 self.target_len = config.target_len;
2281 if resize && !self.hash_table.is_empty() {
2282 self.hash_table.clear();
2284 self.chain_table.clear();
2285 }
2286 }
2287
2288 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2289 self.window_size = 0;
2290 self.history.clear();
2291 self.history_start = 0;
2292 self.history_abs_start = 0;
2293 self.position_base = 0;
2294 self.offset_hist = [1, 4, 8];
2295 if !self.hash_table.is_empty() {
2296 self.hash_table.fill(HC_EMPTY);
2297 self.chain_table.fill(HC_EMPTY);
2298 }
2299 for mut data in self.window.drain(..) {
2300 data.resize(data.capacity(), 0);
2301 reuse_space(data);
2302 }
2303 }
2304
2305 fn get_last_space(&self) -> &[u8] {
2306 self.window.back().unwrap().as_slice()
2307 }
2308
2309 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2313 assert!(data.len() <= self.max_window_size);
2314 while self.window_size + data.len() > self.max_window_size {
2315 let removed = self.window.pop_front().unwrap();
2316 self.window_size -= removed.len();
2317 self.history_start += removed.len();
2318 self.history_abs_start += removed.len();
2319 reuse_space(removed);
2320 }
2321 self.compact_history();
2322 self.history.extend_from_slice(&data);
2323 self.window_size += data.len();
2324 self.window.push_back(data);
2325 }
2326
2327 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2328 while self.window_size > self.max_window_size {
2329 let removed = self.window.pop_front().unwrap();
2330 self.window_size -= removed.len();
2331 self.history_start += removed.len();
2332 self.history_abs_start += removed.len();
2333 reuse_space(removed);
2334 }
2335 }
2336
2337 fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2340 let backfill_start = current_abs_start
2341 .saturating_sub(3)
2342 .max(self.history_abs_start);
2343 if backfill_start < current_abs_start {
2344 self.insert_positions(backfill_start, current_abs_start);
2345 }
2346 }
2347
2348 fn skip_matching(&mut self) {
2349 self.ensure_tables();
2350 let current_len = self.window.back().unwrap().len();
2351 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2352 self.backfill_boundary_positions(current_abs_start);
2353 self.insert_positions(current_abs_start, current_abs_start + current_len);
2354 }
2355
2356 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2357 self.ensure_tables();
2358
2359 let current_len = self.window.back().unwrap().len();
2360 if current_len == 0 {
2361 return;
2362 }
2363
2364 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2365 self.backfill_boundary_positions(current_abs_start);
2366
2367 let mut pos = 0usize;
2368 let mut literals_start = 0usize;
2369 while pos + HC_MIN_MATCH_LEN <= current_len {
2370 let abs_pos = current_abs_start + pos;
2371 let lit_len = pos - literals_start;
2372
2373 let best = self.find_best_match(abs_pos, lit_len);
2374 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2375 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2376 let current = self.window.back().unwrap().as_slice();
2377 let start = candidate.start - current_abs_start;
2378 let literals = ¤t[literals_start..start];
2379 handle_sequence(Sequence::Triple {
2380 literals,
2381 offset: candidate.offset,
2382 match_len: candidate.match_len,
2383 });
2384 let _ = encode_offset_with_history(
2385 candidate.offset as u32,
2386 literals.len() as u32,
2387 &mut self.offset_hist,
2388 );
2389 pos = start + candidate.match_len;
2390 literals_start = pos;
2391 } else {
2392 self.insert_position(abs_pos);
2393 pos += 1;
2394 }
2395 }
2396
2397 while pos + 4 <= current_len {
2400 self.insert_position(current_abs_start + pos);
2401 pos += 1;
2402 }
2403
2404 if literals_start < current_len {
2405 let current = self.window.back().unwrap().as_slice();
2406 handle_sequence(Sequence::Literals {
2407 literals: ¤t[literals_start..],
2408 });
2409 }
2410 }
2411
2412 fn ensure_tables(&mut self) {
2413 if self.hash_table.is_empty() {
2414 self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2415 self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2416 }
2417 }
2418
2419 fn compact_history(&mut self) {
2420 if self.history_start == 0 {
2421 return;
2422 }
2423 if self.history_start >= self.max_window_size
2424 || self.history_start * 2 >= self.history.len()
2425 {
2426 self.history.drain(..self.history_start);
2427 self.history_start = 0;
2428 }
2429 }
2430
2431 fn live_history(&self) -> &[u8] {
2432 &self.history[self.history_start..]
2433 }
2434
2435 fn history_abs_end(&self) -> usize {
2436 self.history_abs_start + self.live_history().len()
2437 }
2438
2439 fn hash_position(&self, data: &[u8]) -> usize {
2440 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2441 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2442 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2443 }
2444
2445 fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2446 let rel = abs_pos.checked_sub(self.position_base)?;
2447 let rel_u32 = u32::try_from(rel).ok()?;
2448 (rel_u32 < u32::MAX).then_some(rel_u32)
2452 }
2453
2454 fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2455 let needs_rebase = self
2456 .relative_position(abs_pos)
2457 .is_none_or(|relative| relative >= u32::MAX - 1);
2458 if !needs_rebase {
2459 return;
2460 }
2461
2462 self.position_base = self.history_abs_start;
2464 self.hash_table.fill(HC_EMPTY);
2465 self.chain_table.fill(HC_EMPTY);
2466
2467 let history_start = self.history_abs_start;
2468 for pos in history_start..abs_pos {
2471 self.insert_position_no_rebase(pos);
2472 }
2473 }
2474
2475 fn insert_position(&mut self, abs_pos: usize) {
2476 self.maybe_rebase_positions(abs_pos);
2477 self.insert_position_no_rebase(abs_pos);
2478 }
2479
2480 fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2481 let idx = abs_pos - self.history_abs_start;
2482 let concat = self.live_history();
2483 if idx + 4 > concat.len() {
2484 return;
2485 }
2486 let hash = self.hash_position(&concat[idx..]);
2487 let Some(relative_pos) = self.relative_position(abs_pos) else {
2488 return;
2489 };
2490 let stored = relative_pos + 1;
2491 let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2492 let prev = self.hash_table[hash];
2493 self.chain_table[chain_idx] = prev;
2494 self.hash_table[hash] = stored;
2495 }
2496
2497 fn insert_positions(&mut self, start: usize, end: usize) {
2498 for pos in start..end {
2499 self.insert_position(pos);
2500 }
2501 }
2502
2503 fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2506 let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2507 let idx = abs_pos - self.history_abs_start;
2508 let concat = self.live_history();
2509 if idx + 4 > concat.len() {
2510 return buf;
2511 }
2512 let hash = self.hash_position(&concat[idx..]);
2513 let chain_mask = (1 << self.chain_log) - 1;
2514
2515 let mut cur = self.hash_table[hash];
2516 let mut filled = 0;
2517 let mut steps = 0;
2525 let max_chain_steps = self.search_depth * 4;
2526 while filled < self.search_depth && steps < max_chain_steps {
2527 if cur == HC_EMPTY {
2528 break;
2529 }
2530 let candidate_rel = cur.wrapping_sub(1) as usize;
2531 let candidate_abs = self.position_base + candidate_rel;
2532 let next = self.chain_table[candidate_rel & chain_mask];
2533 steps += 1;
2534 if next == cur {
2535 if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2538 buf[filled] = candidate_abs;
2539 }
2540 break;
2541 }
2542 cur = next;
2543 if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2544 continue;
2545 }
2546 buf[filled] = candidate_abs;
2547 filled += 1;
2548 }
2549 buf
2550 }
2551
2552 fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2553 let rep = self.repcode_candidate(abs_pos, lit_len);
2554 let hash = self.hash_chain_candidate(abs_pos, lit_len);
2555 Self::better_candidate(rep, hash)
2556 }
2557
2558 fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2559 let concat = self.live_history();
2560 let current_idx = abs_pos - self.history_abs_start;
2561 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2562 return None;
2563 }
2564
2565 let mut best: Option<MatchCandidate> = None;
2566 for candidate_abs in self.chain_candidates(abs_pos) {
2567 if candidate_abs == usize::MAX {
2568 break;
2569 }
2570 let candidate_idx = candidate_abs - self.history_abs_start;
2571 let match_len =
2572 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2573 if match_len >= HC_MIN_MATCH_LEN {
2574 let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2575 best = Self::better_candidate(best, Some(candidate));
2576 if best.is_some_and(|b| b.match_len >= self.target_len) {
2577 return best;
2578 }
2579 }
2580 }
2581 best
2582 }
2583
2584 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2585 let reps = if lit_len == 0 {
2586 [
2587 Some(self.offset_hist[1] as usize),
2588 Some(self.offset_hist[2] as usize),
2589 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2590 ]
2591 } else {
2592 [
2593 Some(self.offset_hist[0] as usize),
2594 Some(self.offset_hist[1] as usize),
2595 Some(self.offset_hist[2] as usize),
2596 ]
2597 };
2598
2599 let concat = self.live_history();
2600 let current_idx = abs_pos - self.history_abs_start;
2601 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2602 return None;
2603 }
2604
2605 let mut best = None;
2606 for rep in reps.into_iter().flatten() {
2607 if rep == 0 || rep > abs_pos {
2608 continue;
2609 }
2610 let candidate_pos = abs_pos - rep;
2611 if candidate_pos < self.history_abs_start {
2612 continue;
2613 }
2614 let candidate_idx = candidate_pos - self.history_abs_start;
2615 let match_len =
2616 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2617 if match_len >= HC_MIN_MATCH_LEN {
2618 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2619 best = Self::better_candidate(best, Some(candidate));
2620 }
2621 }
2622 best
2623 }
2624
2625 fn extend_backwards(
2626 &self,
2627 mut candidate_pos: usize,
2628 mut abs_pos: usize,
2629 mut match_len: usize,
2630 lit_len: usize,
2631 ) -> MatchCandidate {
2632 let concat = self.live_history();
2633 let min_abs_pos = abs_pos - lit_len;
2634 while abs_pos > min_abs_pos
2635 && candidate_pos > self.history_abs_start
2636 && concat[candidate_pos - self.history_abs_start - 1]
2637 == concat[abs_pos - self.history_abs_start - 1]
2638 {
2639 candidate_pos -= 1;
2640 abs_pos -= 1;
2641 match_len += 1;
2642 }
2643 MatchCandidate {
2644 start: abs_pos,
2645 offset: abs_pos - candidate_pos,
2646 match_len,
2647 }
2648 }
2649
2650 fn better_candidate(
2651 lhs: Option<MatchCandidate>,
2652 rhs: Option<MatchCandidate>,
2653 ) -> Option<MatchCandidate> {
2654 match (lhs, rhs) {
2655 (None, other) | (other, None) => other,
2656 (Some(lhs), Some(rhs)) => {
2657 let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
2658 let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
2659 if rhs_gain > lhs_gain {
2660 Some(rhs)
2661 } else {
2662 Some(lhs)
2663 }
2664 }
2665 }
2666 }
2667
2668 fn match_gain(match_len: usize, offset: usize) -> i32 {
2669 debug_assert!(
2670 offset > 0,
2671 "zstd offsets are 1-indexed, offset=0 is invalid"
2672 );
2673 let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
2674 (match_len as i32) * 4 - offset_bits
2675 }
2676
2677 fn pick_lazy_match(
2681 &self,
2682 abs_pos: usize,
2683 lit_len: usize,
2684 best: Option<MatchCandidate>,
2685 ) -> Option<MatchCandidate> {
2686 let best = best?;
2687 if best.match_len >= self.target_len
2688 || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
2689 {
2690 return Some(best);
2691 }
2692
2693 let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
2694
2695 let next = self.find_best_match(abs_pos + 1, lit_len + 1);
2697 if let Some(next) = next {
2698 let next_gain = Self::match_gain(next.match_len, next.offset);
2699 if next_gain > current_gain {
2700 return None;
2701 }
2702 }
2703
2704 if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
2706 let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
2707 if let Some(next2) = next2 {
2708 let next2_gain = Self::match_gain(next2.match_len, next2.offset);
2709 if next2_gain > current_gain + 4 {
2711 return None;
2712 }
2713 }
2714 }
2715
2716 Some(best)
2717 }
2718}
2719
2720#[test]
2721fn matches() {
2722 let mut matcher = MatchGenerator::new(1000);
2723 let mut original_data = Vec::new();
2724 let mut reconstructed = Vec::new();
2725
2726 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
2727 Sequence::Literals { literals } => {
2728 assert!(!literals.is_empty());
2729 reconstructed.extend_from_slice(literals);
2730 }
2731 Sequence::Triple {
2732 literals,
2733 offset,
2734 match_len,
2735 } => {
2736 assert!(offset > 0);
2737 assert!(match_len >= MIN_MATCH_LEN);
2738 reconstructed.extend_from_slice(literals);
2739 assert!(offset <= reconstructed.len());
2740 let start = reconstructed.len() - offset;
2741 for i in 0..match_len {
2742 let byte = reconstructed[start + i];
2743 reconstructed.push(byte);
2744 }
2745 }
2746 };
2747
2748 matcher.add_data(
2749 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2750 SuffixStore::with_capacity(100),
2751 |_, _| {},
2752 );
2753 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2754
2755 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2756
2757 assert!(!matcher.next_sequence(|_| {}));
2758
2759 matcher.add_data(
2760 alloc::vec![
2761 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2762 ],
2763 SuffixStore::with_capacity(100),
2764 |_, _| {},
2765 );
2766 original_data.extend_from_slice(&[
2767 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2768 ]);
2769
2770 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2771 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2772 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2773 assert!(!matcher.next_sequence(|_| {}));
2774
2775 matcher.add_data(
2776 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2777 SuffixStore::with_capacity(100),
2778 |_, _| {},
2779 );
2780 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2781
2782 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2783 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2784 assert!(!matcher.next_sequence(|_| {}));
2785
2786 matcher.add_data(
2787 alloc::vec![0, 0, 0, 0, 0],
2788 SuffixStore::with_capacity(100),
2789 |_, _| {},
2790 );
2791 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2792
2793 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2794 assert!(!matcher.next_sequence(|_| {}));
2795
2796 matcher.add_data(
2797 alloc::vec![7, 8, 9, 10, 11],
2798 SuffixStore::with_capacity(100),
2799 |_, _| {},
2800 );
2801 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2802
2803 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2804 assert!(!matcher.next_sequence(|_| {}));
2805
2806 matcher.add_data(
2807 alloc::vec![1, 3, 5, 7, 9],
2808 SuffixStore::with_capacity(100),
2809 |_, _| {},
2810 );
2811 matcher.skip_matching();
2812 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2813 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2814 assert!(!matcher.next_sequence(|_| {}));
2815
2816 matcher.add_data(
2817 alloc::vec![1, 3, 5, 7, 9],
2818 SuffixStore::with_capacity(100),
2819 |_, _| {},
2820 );
2821 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2822
2823 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2824 assert!(!matcher.next_sequence(|_| {}));
2825
2826 matcher.add_data(
2827 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2828 SuffixStore::with_capacity(100),
2829 |_, _| {},
2830 );
2831 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2832
2833 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2834 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2835 assert!(!matcher.next_sequence(|_| {}));
2836
2837 assert_eq!(reconstructed, original_data);
2838}
2839
2840#[test]
2841fn dfast_matches_roundtrip_multi_block_pattern() {
2842 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2843 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2844 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2845
2846 let mut matcher = DfastMatchGenerator::new(1 << 22);
2847 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2848 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2849 Sequence::Triple {
2850 literals,
2851 offset,
2852 match_len,
2853 } => {
2854 decoded.extend_from_slice(literals);
2855 let start = decoded.len() - offset;
2856 for i in 0..match_len {
2857 let byte = decoded[start + i];
2858 decoded.push(byte);
2859 }
2860 }
2861 };
2862
2863 matcher.add_data(first_block.clone(), |_| {});
2864 let mut history = Vec::new();
2865 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2866 assert_eq!(history, first_block);
2867
2868 matcher.add_data(second_block.clone(), |_| {});
2869 let prefix_len = history.len();
2870 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2871
2872 assert_eq!(&history[prefix_len..], second_block.as_slice());
2873}
2874
2875#[test]
2876fn driver_switches_backends_and_initializes_dfast_via_reset() {
2877 let mut driver = MatchGeneratorDriver::new(32, 2);
2878
2879 driver.reset(CompressionLevel::Default);
2880 assert_eq!(driver.active_backend, MatcherBackend::Dfast);
2881 assert_eq!(driver.window_size(), (1u64 << 22));
2882
2883 let mut first = driver.get_next_space();
2884 first[..12].copy_from_slice(b"abcabcabcabc");
2885 first.truncate(12);
2886 driver.commit_space(first);
2887 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2888 driver.skip_matching();
2889
2890 let mut second = driver.get_next_space();
2891 second[..12].copy_from_slice(b"abcabcabcabc");
2892 second.truncate(12);
2893 driver.commit_space(second);
2894
2895 let mut reconstructed = b"abcabcabcabc".to_vec();
2896 driver.start_matching(|seq| match seq {
2897 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2898 Sequence::Triple {
2899 literals,
2900 offset,
2901 match_len,
2902 } => {
2903 reconstructed.extend_from_slice(literals);
2904 let start = reconstructed.len() - offset;
2905 for i in 0..match_len {
2906 let byte = reconstructed[start + i];
2907 reconstructed.push(byte);
2908 }
2909 }
2910 });
2911 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2912
2913 driver.reset(CompressionLevel::Fastest);
2914 assert_eq!(driver.window_size(), (1u64 << 17));
2915}
2916
2917#[test]
2918fn driver_level4_selects_row_backend() {
2919 let mut driver = MatchGeneratorDriver::new(32, 2);
2920 driver.reset(CompressionLevel::Level(4));
2921 assert_eq!(driver.active_backend, MatcherBackend::Row);
2922}
2923
2924#[test]
2925fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2926 let mut driver = MatchGeneratorDriver::new(32, 2);
2927
2928 driver.reset(CompressionLevel::Level(2));
2929 let mut space = driver.get_next_space();
2930 space[..12].copy_from_slice(b"abcabcabcabc");
2931 space.truncate(12);
2932 driver.commit_space(space);
2933 driver.skip_matching();
2934 let full_tables = driver.dfast_matcher().short_hash.len();
2935 assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2936
2937 driver.set_source_size_hint(1024);
2938 driver.reset(CompressionLevel::Level(2));
2939 let mut space = driver.get_next_space();
2940 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2941 space.truncate(12);
2942 driver.commit_space(space);
2943 driver.skip_matching();
2944 let hinted_tables = driver.dfast_matcher().short_hash.len();
2945
2946 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2947 assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
2948 assert!(
2949 hinted_tables < full_tables,
2950 "tiny source hint should reduce dfast table footprint"
2951 );
2952}
2953
2954#[test]
2955fn driver_small_source_hint_shrinks_row_hash_tables() {
2956 let mut driver = MatchGeneratorDriver::new(32, 2);
2957
2958 driver.reset(CompressionLevel::Level(4));
2959 let mut space = driver.get_next_space();
2960 space[..12].copy_from_slice(b"abcabcabcabc");
2961 space.truncate(12);
2962 driver.commit_space(space);
2963 driver.skip_matching();
2964 let full_rows = driver.row_matcher().row_heads.len();
2965 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
2966
2967 driver.set_source_size_hint(1024);
2968 driver.reset(CompressionLevel::Level(4));
2969 let mut space = driver.get_next_space();
2970 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2971 space.truncate(12);
2972 driver.commit_space(space);
2973 driver.skip_matching();
2974 let hinted_rows = driver.row_matcher().row_heads.len();
2975
2976 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2977 assert_eq!(
2978 hinted_rows,
2979 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
2980 );
2981 assert!(
2982 hinted_rows < full_rows,
2983 "tiny source hint should reduce row hash table footprint"
2984 );
2985}
2986
2987#[test]
2988fn row_matches_roundtrip_multi_block_pattern() {
2989 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
2990 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2991 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2992
2993 let mut matcher = RowMatchGenerator::new(1 << 22);
2994 matcher.configure(ROW_CONFIG);
2995 matcher.ensure_tables();
2996 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2997 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2998 Sequence::Triple {
2999 literals,
3000 offset,
3001 match_len,
3002 } => {
3003 decoded.extend_from_slice(literals);
3004 let start = decoded.len() - offset;
3005 for i in 0..match_len {
3006 let byte = decoded[start + i];
3007 decoded.push(byte);
3008 }
3009 }
3010 };
3011
3012 matcher.add_data(first_block.clone(), |_| {});
3013 let mut history = Vec::new();
3014 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3015 assert_eq!(history, first_block);
3016
3017 matcher.add_data(second_block.clone(), |_| {});
3018 let prefix_len = history.len();
3019 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3020
3021 assert_eq!(&history[prefix_len..], second_block.as_slice());
3022
3023 let third_block: Vec<u8> = (0u8..=255).collect();
3025 matcher.add_data(third_block.clone(), |_| {});
3026 let third_prefix = history.len();
3027 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3028 assert_eq!(&history[third_prefix..], third_block.as_slice());
3029}
3030
3031#[test]
3032fn row_short_block_emits_literals_only() {
3033 let mut matcher = RowMatchGenerator::new(1 << 22);
3034 matcher.configure(ROW_CONFIG);
3035
3036 matcher.add_data(b"abcde".to_vec(), |_| {});
3037
3038 let mut saw_triple = false;
3039 let mut reconstructed = Vec::new();
3040 matcher.start_matching(|seq| match seq {
3041 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3042 Sequence::Triple { .. } => saw_triple = true,
3043 });
3044
3045 assert!(
3046 !saw_triple,
3047 "row backend must not emit triples for short blocks"
3048 );
3049 assert_eq!(reconstructed, b"abcde");
3050
3051 saw_triple = false;
3053 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
3054 matcher.start_matching(|seq| {
3055 if let Sequence::Triple { .. } = seq {
3056 saw_triple = true;
3057 }
3058 });
3059 assert!(
3060 saw_triple,
3061 "row backend should emit triples on repeated data"
3062 );
3063}
3064
3065#[test]
3066fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
3067 let mut matcher = RowMatchGenerator::new(1 << 22);
3068 matcher.configure(ROW_CONFIG);
3069 matcher.add_data(b"abcabc".to_vec(), |_| {});
3070
3071 let best = MatchCandidate {
3072 start: 0,
3073 offset: 1,
3074 match_len: ROW_MIN_MATCH_LEN,
3075 };
3076 let picked = matcher
3077 .pick_lazy_match(0, 0, Some(best))
3078 .expect("best candidate must survive");
3079
3080 assert_eq!(picked.start, best.start);
3081 assert_eq!(picked.offset, best.offset);
3082 assert_eq!(picked.match_len, best.match_len);
3083}
3084
3085#[test]
3086fn row_backfills_previous_block_tail_for_cross_boundary_match() {
3087 let mut matcher = RowMatchGenerator::new(1 << 22);
3088 matcher.configure(ROW_CONFIG);
3089
3090 let mut first_block = alloc::vec![0xA5; 64];
3091 first_block.extend_from_slice(b"XYZ");
3092 let second_block = b"XYZXYZtail".to_vec();
3093
3094 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3095 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3096 Sequence::Triple {
3097 literals,
3098 offset,
3099 match_len,
3100 } => {
3101 decoded.extend_from_slice(literals);
3102 let start = decoded.len() - offset;
3103 for i in 0..match_len {
3104 let byte = decoded[start + i];
3105 decoded.push(byte);
3106 }
3107 }
3108 };
3109
3110 matcher.add_data(first_block.clone(), |_| {});
3111 let mut reconstructed = Vec::new();
3112 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
3113 assert_eq!(reconstructed, first_block);
3114
3115 matcher.add_data(second_block.clone(), |_| {});
3116 let mut saw_cross_boundary = false;
3117 let prefix_len = reconstructed.len();
3118 matcher.start_matching(|seq| {
3119 if let Sequence::Triple {
3120 literals,
3121 offset,
3122 match_len,
3123 } = seq
3124 && literals.is_empty()
3125 && offset == 3
3126 && match_len >= ROW_MIN_MATCH_LEN
3127 {
3128 saw_cross_boundary = true;
3129 }
3130 replay_sequence(&mut reconstructed, seq);
3131 });
3132
3133 assert!(
3134 saw_cross_boundary,
3135 "row matcher should reuse the 3-byte previous-block tail"
3136 );
3137 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
3138}
3139
3140#[test]
3141fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
3142 let mut driver = MatchGeneratorDriver::new(32, 2);
3143
3144 driver.reset(CompressionLevel::Level(2));
3145 let mut space = driver.get_next_space();
3146 space[..12].copy_from_slice(b"abcabcabcabc");
3147 space.truncate(12);
3148 driver.commit_space(space);
3149 driver.skip_matching();
3150
3151 let table_len = driver.dfast_matcher().short_hash.len();
3152 assert_eq!(
3153 table_len,
3154 1 << DFAST_HASH_BITS,
3155 "unhinted Level(2) should keep default dfast table size"
3156 );
3157}
3158
3159#[test]
3160fn simple_backend_rejects_undersized_pooled_suffix_store() {
3161 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3162 driver.reset(CompressionLevel::Fastest);
3163
3164 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
3165
3166 let mut space = driver.get_next_space();
3167 space.clear();
3168 space.resize(4096, 0xAB);
3169 driver.commit_space(space);
3170
3171 let last_suffix_slots = driver
3172 .match_generator
3173 .window
3174 .last()
3175 .expect("window entry must exist after commit")
3176 .suffixes
3177 .slots
3178 .len();
3179 assert!(
3180 last_suffix_slots >= 4096,
3181 "undersized pooled suffix store must not be reused for larger blocks"
3182 );
3183}
3184
3185#[test]
3186fn source_hint_clamps_driver_slice_size_to_window() {
3187 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3188 driver.set_source_size_hint(1024);
3189 driver.reset(CompressionLevel::Default);
3190
3191 let window = driver.window_size() as usize;
3192 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
3193 assert_eq!(driver.slice_size, window);
3194
3195 let space = driver.get_next_space();
3196 assert_eq!(space.len(), window);
3197 driver.commit_space(space);
3198}
3199
3200#[test]
3201fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3202 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3203 driver.reset(CompressionLevel::Default);
3204
3205 let large = driver.get_next_space();
3206 let large_capacity = large.capacity();
3207 assert!(large_capacity >= 128 * 1024);
3208 driver.commit_space(large);
3209
3210 driver.set_source_size_hint(1024);
3211 driver.reset(CompressionLevel::Default);
3212
3213 let small = driver.get_next_space();
3214 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
3215 assert!(
3216 small.capacity() >= large_capacity,
3217 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3218 );
3219}
3220
3221#[test]
3222fn driver_best_to_fastest_releases_oversized_hc_tables() {
3223 let mut driver = MatchGeneratorDriver::new(32, 2);
3224
3225 driver.reset(CompressionLevel::Best);
3227 assert_eq!(driver.window_size(), (1u64 << 24));
3228
3229 let mut space = driver.get_next_space();
3231 space[..12].copy_from_slice(b"abcabcabcabc");
3232 space.truncate(12);
3233 driver.commit_space(space);
3234 driver.skip_matching();
3235
3236 driver.reset(CompressionLevel::Fastest);
3238 assert_eq!(driver.window_size(), (1u64 << 17));
3239
3240 let hc = driver.hc_match_generator.as_ref().unwrap();
3242 assert!(
3243 hc.hash_table.is_empty(),
3244 "HC hash_table should be released after switching away from Best"
3245 );
3246 assert!(
3247 hc.chain_table.is_empty(),
3248 "HC chain_table should be released after switching away from Best"
3249 );
3250}
3251
3252#[test]
3253fn driver_better_to_best_resizes_hc_tables() {
3254 let mut driver = MatchGeneratorDriver::new(32, 2);
3255
3256 driver.reset(CompressionLevel::Better);
3258 assert_eq!(driver.window_size(), (1u64 << 23));
3259
3260 let mut space = driver.get_next_space();
3261 space[..12].copy_from_slice(b"abcabcabcabc");
3262 space.truncate(12);
3263 driver.commit_space(space);
3264 driver.skip_matching();
3265
3266 let hc = driver.hc_match_generator.as_ref().unwrap();
3267 let better_hash_len = hc.hash_table.len();
3268 let better_chain_len = hc.chain_table.len();
3269
3270 driver.reset(CompressionLevel::Best);
3272 assert_eq!(driver.window_size(), (1u64 << 24));
3273
3274 let mut space = driver.get_next_space();
3276 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3277 space.truncate(12);
3278 driver.commit_space(space);
3279 driver.skip_matching();
3280
3281 let hc = driver.hc_match_generator.as_ref().unwrap();
3282 assert!(
3283 hc.hash_table.len() > better_hash_len,
3284 "Best hash_table ({}) should be larger than Better ({})",
3285 hc.hash_table.len(),
3286 better_hash_len
3287 );
3288 assert!(
3289 hc.chain_table.len() > better_chain_len,
3290 "Best chain_table ({}) should be larger than Better ({})",
3291 hc.chain_table.len(),
3292 better_chain_len
3293 );
3294}
3295
3296#[test]
3297fn prime_with_dictionary_preserves_history_for_first_full_block() {
3298 let mut driver = MatchGeneratorDriver::new(8, 1);
3299 driver.reset(CompressionLevel::Fastest);
3300
3301 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3302
3303 let mut space = driver.get_next_space();
3304 space.clear();
3305 space.extend_from_slice(b"abcdefgh");
3306 driver.commit_space(space);
3307
3308 let mut saw_match = false;
3309 driver.start_matching(|seq| {
3310 if let Sequence::Triple {
3311 literals,
3312 offset,
3313 match_len,
3314 } = seq
3315 && literals.is_empty()
3316 && offset == 8
3317 && match_len >= MIN_MATCH_LEN
3318 {
3319 saw_match = true;
3320 }
3321 });
3322
3323 assert!(
3324 saw_match,
3325 "first full block should still match dictionary-primed history"
3326 );
3327}
3328
3329#[test]
3330fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3331 let mut driver = MatchGeneratorDriver::new(8, 1);
3332 driver.reset(CompressionLevel::Fastest);
3333
3334 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3335
3336 let mut space = driver.get_next_space();
3337 space.clear();
3338 space.extend_from_slice(b"abcdefgh");
3339 driver.commit_space(space);
3340
3341 let mut saw_match = false;
3342 driver.start_matching(|seq| {
3343 if let Sequence::Triple {
3344 literals,
3345 offset,
3346 match_len,
3347 } = seq
3348 && literals.is_empty()
3349 && offset == 24
3350 && match_len >= MIN_MATCH_LEN
3351 {
3352 saw_match = true;
3353 }
3354 });
3355
3356 assert!(
3357 saw_match,
3358 "dictionary bytes should remain addressable until frame output exceeds the live window"
3359 );
3360}
3361
3362#[test]
3363fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3364 let mut driver = MatchGeneratorDriver::new(8, 1);
3365 driver.reset(CompressionLevel::Fastest);
3366
3367 driver.prime_with_dictionary(&[], [11, 7, 3]);
3368
3369 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3370}
3371
3372#[test]
3373fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3374 let mut driver = MatchGeneratorDriver::new(8, 1);
3375 driver.reset(CompressionLevel::Level(2));
3376
3377 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3378
3379 let mut space = driver.get_next_space();
3380 space.clear();
3381 space.extend_from_slice(b"abcdefgh");
3382 driver.commit_space(space);
3383
3384 let mut saw_match = false;
3385 driver.start_matching(|seq| {
3386 if let Sequence::Triple {
3387 literals,
3388 offset,
3389 match_len,
3390 } = seq
3391 && literals.is_empty()
3392 && offset == 8
3393 && match_len >= DFAST_MIN_MATCH_LEN
3394 {
3395 saw_match = true;
3396 }
3397 });
3398
3399 assert!(
3400 saw_match,
3401 "dfast backend should match dictionary-primed history in first full block"
3402 );
3403}
3404
3405#[test]
3406fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3407 let mut driver = MatchGeneratorDriver::new(8, 1);
3408 driver.reset(CompressionLevel::Fastest);
3409
3410 let before = driver.window_size();
3411 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3412 let after = driver.window_size();
3413
3414 assert_eq!(
3415 after, before,
3416 "dictionary retention budget must not change reported frame window size"
3417 );
3418}
3419
3420#[test]
3421fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3422 let mut driver = MatchGeneratorDriver::new(8, 2);
3423 driver.reset(CompressionLevel::Fastest);
3424
3425 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3428
3429 assert!(
3430 driver
3431 .match_generator
3432 .window
3433 .iter()
3434 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3435 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3436 );
3437}
3438
3439#[test]
3440fn prime_with_dictionary_counts_only_committed_tail_budget() {
3441 let mut driver = MatchGeneratorDriver::new(8, 1);
3442 driver.reset(CompressionLevel::Fastest);
3443
3444 let before = driver.match_generator.max_window_size;
3445 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3447
3448 assert_eq!(
3449 driver.match_generator.max_window_size,
3450 before + 8,
3451 "retention budget must account only for dictionary bytes actually committed to history"
3452 );
3453}
3454
3455#[test]
3456fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3457 let mut driver = MatchGeneratorDriver::new(8, 1);
3458 driver.reset(CompressionLevel::Level(2));
3459
3460 let before = driver.dfast_matcher().max_window_size;
3461 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3464
3465 assert_eq!(
3466 driver.dfast_matcher().max_window_size,
3467 before + 12,
3468 "dfast retention budget should include 4-byte dictionary tails"
3469 );
3470}
3471
3472#[test]
3473fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3474 let mut driver = MatchGeneratorDriver::new(8, 1);
3475 driver.reset(CompressionLevel::Level(4));
3476
3477 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3478
3479 let mut space = driver.get_next_space();
3480 space.clear();
3481 space.extend_from_slice(b"abcdefgh");
3482 driver.commit_space(space);
3483
3484 let mut saw_match = false;
3485 driver.start_matching(|seq| {
3486 if let Sequence::Triple {
3487 literals,
3488 offset,
3489 match_len,
3490 } = seq
3491 && literals.is_empty()
3492 && offset == 8
3493 && match_len >= ROW_MIN_MATCH_LEN
3494 {
3495 saw_match = true;
3496 }
3497 });
3498
3499 assert!(
3500 saw_match,
3501 "row backend should match dictionary-primed history in first full block"
3502 );
3503}
3504
3505#[test]
3506fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3507 let mut driver = MatchGeneratorDriver::new(8, 1);
3508 driver.reset(CompressionLevel::Level(4));
3509
3510 let base_window = driver.row_matcher().max_window_size;
3511 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3514
3515 assert_eq!(
3516 driver.row_matcher().max_window_size,
3517 base_window + 8,
3518 "row retained window must exclude uncommitted 1-byte tail"
3519 );
3520}
3521
3522#[test]
3523fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3524 let mut driver = MatchGeneratorDriver::new(8, 1);
3525 driver.reset(CompressionLevel::Level(4));
3526 driver.row_matcher_mut().max_window_size = 8;
3528 driver.reported_window_size = 8;
3529
3530 let base_window = driver.row_matcher().max_window_size;
3531 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3532 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3533
3534 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3535 let mut space = driver.get_next_space();
3536 space.clear();
3537 space.extend_from_slice(block);
3538 driver.commit_space(space);
3539 driver.skip_matching();
3540 }
3541
3542 assert_eq!(
3543 driver.dictionary_retained_budget, 0,
3544 "dictionary budget should be fully retired once primed dict slices are evicted"
3545 );
3546 assert_eq!(
3547 driver.row_matcher().max_window_size,
3548 base_window,
3549 "retired dictionary budget must not remain reusable for live history"
3550 );
3551}
3552
3553#[test]
3554fn row_get_last_space_and_reset_to_fastest_clears_window() {
3555 let mut driver = MatchGeneratorDriver::new(8, 1);
3556 driver.reset(CompressionLevel::Level(4));
3557
3558 let mut space = driver.get_next_space();
3559 space.clear();
3560 space.extend_from_slice(b"row-data");
3561 driver.commit_space(space);
3562
3563 assert_eq!(driver.get_last_space(), b"row-data");
3564
3565 driver.reset(CompressionLevel::Fastest);
3566 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3567 assert!(driver.row_matcher().window.is_empty());
3568}
3569
3570#[test]
3572fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3573 let mut driver = MatchGeneratorDriver::new(8, 1);
3574 driver.reset(CompressionLevel::Level(4));
3575 assert_eq!(driver.active_backend, MatcherBackend::Row);
3576
3577 let _ = driver.row_matcher();
3580 let mut space = driver.get_next_space();
3581 space.extend_from_slice(b"row-data-to-recycle");
3582 driver.commit_space(space);
3583
3584 let before_pool = driver.vec_pool.len();
3585 driver.reset(CompressionLevel::Fastest);
3586
3587 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3588 let row = driver
3589 .row_match_generator
3590 .as_ref()
3591 .expect("row matcher should remain allocated after switch");
3592 assert!(row.row_heads.is_empty());
3593 assert!(row.row_positions.is_empty());
3594 assert!(row.row_tags.is_empty());
3595 assert!(
3596 driver.vec_pool.len() >= before_pool,
3597 "row reset should recycle row history buffers"
3598 );
3599}
3600
3601#[test]
3603fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
3604 let mut driver = MatchGeneratorDriver::new(8, 1);
3605 driver.active_backend = MatcherBackend::Row;
3606 driver.row_match_generator = None;
3607
3608 driver.reset(CompressionLevel::Fastest);
3609
3610 assert_eq!(driver.active_backend, MatcherBackend::Simple);
3611}
3612
3613#[test]
3614fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
3615 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
3616 params.window_log = 22;
3617 let adjusted = adjust_params_for_source_size(params, 0);
3618 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
3619}
3620
3621#[test]
3622fn common_prefix_len_matches_scalar_reference_across_offsets() {
3623 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
3624 a.iter()
3625 .zip(b.iter())
3626 .take_while(|(lhs, rhs)| lhs == rhs)
3627 .count()
3628 }
3629
3630 for total_len in [
3631 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
3632 ] {
3633 let base: Vec<u8> = (0..total_len)
3634 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
3635 .collect();
3636
3637 for start in [0usize, 1, 3] {
3638 if start > total_len {
3639 continue;
3640 }
3641 let a = &base[start..];
3642 let b = a.to_vec();
3643 assert_eq!(
3644 MatchGenerator::common_prefix_len(a, &b),
3645 scalar_reference(a, &b),
3646 "equal slices total_len={total_len} start={start}"
3647 );
3648
3649 let len = a.len();
3650 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
3651 if mismatch >= len {
3652 continue;
3653 }
3654 let mut altered = b.clone();
3655 altered[mismatch] ^= 0x5A;
3656 assert_eq!(
3657 MatchGenerator::common_prefix_len(a, &altered),
3658 scalar_reference(a, &altered),
3659 "total_len={total_len} start={start} mismatch={mismatch}"
3660 );
3661 }
3662
3663 if len > 0 {
3664 let mismatch = len - 1;
3665 let mut altered = b.clone();
3666 altered[mismatch] ^= 0xA5;
3667 assert_eq!(
3668 MatchGenerator::common_prefix_len(a, &altered),
3669 scalar_reference(a, &altered),
3670 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
3671 );
3672 }
3673 }
3674 }
3675
3676 let long = alloc::vec![0xAB; 320];
3677 let shorter = alloc::vec![0xAB; 137];
3678 assert_eq!(
3679 MatchGenerator::common_prefix_len(&long, &shorter),
3680 scalar_reference(&long, &shorter)
3681 );
3682}
3683
3684#[test]
3685fn row_pick_lazy_returns_none_when_next_is_better() {
3686 let mut matcher = RowMatchGenerator::new(1 << 22);
3687 matcher.configure(ROW_CONFIG);
3688 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
3689 matcher.ensure_tables();
3690
3691 let abs_pos = matcher.history_abs_start + 16;
3692 let best = MatchCandidate {
3693 start: abs_pos,
3694 offset: 8,
3695 match_len: ROW_MIN_MATCH_LEN,
3696 };
3697 assert!(
3698 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3699 "lazy picker should defer when next position is clearly better"
3700 );
3701}
3702
3703#[test]
3704fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
3705 let mut matcher = RowMatchGenerator::new(1 << 22);
3706 matcher.configure(ROW_CONFIG);
3707 matcher.lazy_depth = 2;
3708 matcher.search_depth = 0;
3709 matcher.offset_hist = [6, 9, 1];
3710
3711 let mut data = alloc::vec![b'x'; 40];
3712 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
3713 matcher.add_data(data, |_| {});
3714 matcher.ensure_tables();
3715
3716 let abs_pos = matcher.history_abs_start + 20;
3717 let best = matcher
3718 .best_match(abs_pos, 0)
3719 .expect("expected baseline repcode match");
3720 assert_eq!(best.offset, 9);
3721 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3722
3723 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
3724 assert!(next.match_len <= best.match_len);
3725 }
3726
3727 let next2 = matcher
3728 .best_match(abs_pos + 2, 2)
3729 .expect("expected +2 candidate");
3730 assert!(
3731 next2.match_len > best.match_len + 1,
3732 "+2 candidate must be significantly better for depth-2 lazy skip"
3733 );
3734 assert!(
3735 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3736 "lazy picker should defer when +2 candidate is significantly better"
3737 );
3738}
3739
3740#[test]
3741fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
3742 let mut matcher = RowMatchGenerator::new(1 << 22);
3743 matcher.configure(ROW_CONFIG);
3744 matcher.lazy_depth = 2;
3745 matcher.search_depth = 0;
3746 matcher.offset_hist = [6, 9, 1];
3747
3748 let mut data = alloc::vec![b'x'; 40];
3749 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
3750 matcher.add_data(data, |_| {});
3751 matcher.ensure_tables();
3752
3753 let abs_pos = matcher.history_abs_start + 20;
3754 let best = matcher
3755 .best_match(abs_pos, 0)
3756 .expect("expected baseline repcode match");
3757 assert_eq!(best.offset, 9);
3758 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3759
3760 let next2 = matcher
3761 .best_match(abs_pos + 2, 2)
3762 .expect("expected +2 candidate");
3763 assert_eq!(next2.match_len, best.match_len + 1);
3764 let chosen = matcher
3765 .pick_lazy_match(abs_pos, 0, Some(best))
3766 .expect("lazy picker should keep current best");
3767 assert_eq!(chosen.start, best.start);
3768 assert_eq!(chosen.offset, best.offset);
3769 assert_eq!(chosen.match_len, best.match_len);
3770}
3771
3772#[test]
3774fn row_hash_and_row_extracts_high_bits() {
3775 let mut matcher = RowMatchGenerator::new(1 << 22);
3776 matcher.configure(ROW_CONFIG);
3777 matcher.add_data(
3778 alloc::vec![
3779 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
3780 0x70, 0x80,
3781 ],
3782 |_| {},
3783 );
3784 matcher.ensure_tables();
3785
3786 let pos = matcher.history_abs_start + 8;
3787 let (row, tag) = matcher
3788 .hash_and_row(pos)
3789 .expect("row hash should be available");
3790
3791 let idx = pos - matcher.history_abs_start;
3792 let concat = matcher.live_history();
3793 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
3794 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
3795 let hash = value.wrapping_mul(PRIME);
3796 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
3797 let combined = hash >> (u64::BITS as usize - total_bits);
3798 let expected_row =
3799 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
3800 let expected_tag = combined as u8;
3801
3802 assert_eq!(row, expected_row);
3803 assert_eq!(tag, expected_tag);
3804}
3805
3806#[test]
3807fn row_repcode_skips_candidate_before_history_start() {
3808 let mut matcher = RowMatchGenerator::new(1 << 22);
3809 matcher.configure(ROW_CONFIG);
3810 matcher.history = alloc::vec![b'a'; 20];
3811 matcher.history_start = 0;
3812 matcher.history_abs_start = 10;
3813 matcher.offset_hist = [3, 0, 0];
3814
3815 assert!(matcher.repcode_candidate(12, 1).is_none());
3816}
3817
3818#[test]
3819fn row_repcode_returns_none_when_position_too_close_to_history_end() {
3820 let mut matcher = RowMatchGenerator::new(1 << 22);
3821 matcher.configure(ROW_CONFIG);
3822 matcher.history = b"abcde".to_vec();
3823 matcher.history_start = 0;
3824 matcher.history_abs_start = 0;
3825 matcher.offset_hist = [1, 0, 0];
3826
3827 assert!(matcher.repcode_candidate(4, 1).is_none());
3828}
3829
3830#[test]
3831fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
3832 let mut matcher = RowMatchGenerator::new(1 << 22);
3833 matcher.configure(ROW_CONFIG);
3834 matcher.history = b"abcde".to_vec();
3835 matcher.history_start = 0;
3836 matcher.history_abs_start = 0;
3837
3838 assert!(matcher.row_candidate(0, 0).is_none());
3839}
3840
3841#[test]
3842fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
3843 let mut hc = HcMatchGenerator::new(32);
3844 hc.history = b"abc".to_vec();
3845 hc.history_start = 0;
3846 hc.history_abs_start = 0;
3847 hc.ensure_tables();
3848
3849 let candidates = hc.chain_candidates(0);
3850 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
3851}
3852
3853#[test]
3854fn hc_reset_refills_existing_tables_with_empty_sentinel() {
3855 let mut hc = HcMatchGenerator::new(32);
3856 hc.add_data(b"abcdeabcde".to_vec(), |_| {});
3857 hc.ensure_tables();
3858 assert!(!hc.hash_table.is_empty());
3859 assert!(!hc.chain_table.is_empty());
3860 hc.hash_table.fill(123);
3861 hc.chain_table.fill(456);
3862
3863 hc.reset(|_| {});
3864
3865 assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
3866 assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
3867}
3868
3869#[test]
3870fn hc_start_matching_returns_early_for_empty_current_block() {
3871 let mut hc = HcMatchGenerator::new(32);
3872 hc.add_data(Vec::new(), |_| {});
3873 let mut called = false;
3874 hc.start_matching(|_| called = true);
3875 assert!(!called, "empty current block should not emit sequences");
3876}
3877
3878#[test]
3879fn hc_compact_history_drains_when_threshold_crossed() {
3880 let mut hc = HcMatchGenerator::new(8);
3881 hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
3882 hc.history_start = 16;
3883 hc.compact_history();
3884 assert_eq!(hc.history_start, 0);
3885 assert_eq!(hc.history, b"qrstuvwxyz");
3886}
3887
3888#[test]
3889fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
3890 let mut hc = HcMatchGenerator::new(32);
3891 hc.history = b"abcdefghijklmnop".to_vec();
3892 hc.history_abs_start = 0;
3893 hc.position_base = 1;
3894 hc.ensure_tables();
3895 let before_hash = hc.hash_table.clone();
3896 let before_chain = hc.chain_table.clone();
3897
3898 hc.insert_position_no_rebase(0);
3899
3900 assert_eq!(hc.hash_table, before_hash);
3901 assert_eq!(hc.chain_table, before_chain);
3902}
3903
3904#[test]
3905fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
3906 let mut driver = MatchGeneratorDriver::new(8, 1);
3907 driver.reset(CompressionLevel::Fastest);
3908 driver.match_generator.max_window_size = 8;
3911 driver.reported_window_size = 8;
3912
3913 let base_window = driver.match_generator.max_window_size;
3914 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3915 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
3916
3917 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3918 let mut space = driver.get_next_space();
3919 space.clear();
3920 space.extend_from_slice(block);
3921 driver.commit_space(space);
3922 driver.skip_matching();
3923 }
3924
3925 assert_eq!(
3926 driver.dictionary_retained_budget, 0,
3927 "dictionary budget should be fully retired once primed dict slices are evicted"
3928 );
3929 assert_eq!(
3930 driver.match_generator.max_window_size, base_window,
3931 "retired dictionary budget must not remain reusable for live history"
3932 );
3933}
3934
3935#[test]
3936fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
3937 let mut driver = MatchGeneratorDriver::new(8, 1);
3938 driver.reset(CompressionLevel::Level(2));
3939 driver.dfast_matcher_mut().max_window_size = 8;
3942 driver.reported_window_size = 8;
3943
3944 let base_window = driver.dfast_matcher().max_window_size;
3945 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3946 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
3947
3948 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3949 let mut space = driver.get_next_space();
3950 space.clear();
3951 space.extend_from_slice(block);
3952 driver.commit_space(space);
3953 driver.skip_matching();
3954 }
3955
3956 assert_eq!(
3957 driver.dictionary_retained_budget, 0,
3958 "dictionary budget should be fully retired once primed dict slices are evicted"
3959 );
3960 assert_eq!(
3961 driver.dfast_matcher().max_window_size,
3962 base_window,
3963 "retired dictionary budget must not remain reusable for live history"
3964 );
3965}
3966
3967#[test]
3968fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
3969 let mut driver = MatchGeneratorDriver::new(8, 1);
3970 driver.reset(CompressionLevel::Better);
3971
3972 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3973
3974 let mut space = driver.get_next_space();
3975 space.clear();
3976 space.extend_from_slice(b"abcdefgh");
3979 driver.commit_space(space);
3980
3981 let mut saw_match = false;
3982 driver.start_matching(|seq| {
3983 if let Sequence::Triple {
3984 literals,
3985 offset,
3986 match_len,
3987 } = seq
3988 && literals.is_empty()
3989 && offset == 8
3990 && match_len >= HC_MIN_MATCH_LEN
3991 {
3992 saw_match = true;
3993 }
3994 });
3995
3996 assert!(
3997 saw_match,
3998 "hash-chain backend should match dictionary-primed history in first full block"
3999 );
4000}
4001
4002#[test]
4003fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
4004 let mut driver = MatchGeneratorDriver::new(8, 1);
4005 driver.reset(CompressionLevel::Better);
4006 driver.hc_matcher_mut().max_window_size = 8;
4008 driver.reported_window_size = 8;
4009
4010 let base_window = driver.hc_matcher().max_window_size;
4011 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4012 assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
4013
4014 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4015 let mut space = driver.get_next_space();
4016 space.clear();
4017 space.extend_from_slice(block);
4018 driver.commit_space(space);
4019 driver.skip_matching();
4020 }
4021
4022 assert_eq!(
4023 driver.dictionary_retained_budget, 0,
4024 "dictionary budget should be fully retired once primed dict slices are evicted"
4025 );
4026 assert_eq!(
4027 driver.hc_matcher().max_window_size,
4028 base_window,
4029 "retired dictionary budget must not remain reusable for live history"
4030 );
4031}
4032
4033#[test]
4034fn hc_rebases_positions_after_u32_boundary() {
4035 let mut matcher = HcMatchGenerator::new(64);
4036 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4037 matcher.ensure_tables();
4038 matcher.position_base = 0;
4039 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4040 Ok(value) => value,
4041 Err(_) => return,
4042 };
4043 matcher.history_abs_start = history_abs_start;
4046 matcher.skip_matching();
4047 assert_eq!(
4048 matcher.position_base, matcher.history_abs_start,
4049 "rebase should anchor to the oldest live absolute position"
4050 );
4051
4052 assert!(
4053 matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
4054 "HC hash table should still be populated after crossing u32 boundary"
4055 );
4056
4057 let abs_pos = matcher.history_abs_start + 10;
4059 let candidates = matcher.chain_candidates(abs_pos);
4060 assert!(
4061 candidates.iter().any(|candidate| *candidate != usize::MAX),
4062 "chain_candidates should return valid matches after rebase"
4063 );
4064}
4065
4066#[test]
4067fn hc_rebase_rebuilds_only_inserted_prefix() {
4068 let mut matcher = HcMatchGenerator::new(64);
4069 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4070 matcher.ensure_tables();
4071 matcher.position_base = 0;
4072 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4073 Ok(value) => value,
4074 Err(_) => return,
4075 };
4076 matcher.history_abs_start = history_abs_start;
4077 let abs_pos = matcher.history_abs_start + 6;
4078
4079 let mut expected = HcMatchGenerator::new(64);
4080 expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4081 expected.ensure_tables();
4082 expected.history_abs_start = history_abs_start;
4083 expected.position_base = expected.history_abs_start;
4084 expected.hash_table.fill(HC_EMPTY);
4085 expected.chain_table.fill(HC_EMPTY);
4086 for pos in expected.history_abs_start..abs_pos {
4087 expected.insert_position_no_rebase(pos);
4088 }
4089
4090 matcher.maybe_rebase_positions(abs_pos);
4091
4092 assert_eq!(
4093 matcher.position_base, matcher.history_abs_start,
4094 "rebase should still anchor to the oldest live absolute position"
4095 );
4096 assert_eq!(
4097 matcher.hash_table, expected.hash_table,
4098 "rebase must rebuild only positions already inserted before abs_pos"
4099 );
4100 assert_eq!(
4101 matcher.chain_table, expected.chain_table,
4102 "future positions must not be pre-seeded into HC chains during rebase"
4103 );
4104}
4105
4106#[test]
4107fn suffix_store_with_single_slot_does_not_panic_on_keying() {
4108 let mut suffixes = SuffixStore::with_capacity(1);
4109 suffixes.insert(b"abcde", 0);
4110 assert!(suffixes.contains_key(b"abcde"));
4111 assert_eq!(suffixes.get(b"abcde"), Some(0));
4112}
4113
4114#[test]
4115fn fastest_reset_uses_interleaved_hash_fill_step() {
4116 let mut driver = MatchGeneratorDriver::new(32, 2);
4117
4118 driver.reset(CompressionLevel::Uncompressed);
4119 assert_eq!(driver.match_generator.hash_fill_step, 1);
4120
4121 driver.reset(CompressionLevel::Fastest);
4122 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
4123
4124 driver.reset(CompressionLevel::Better);
4127 assert_eq!(driver.active_backend, MatcherBackend::HashChain);
4128 assert_eq!(driver.window_size(), (1u64 << 23));
4129 assert_eq!(driver.hc_matcher().lazy_depth, 2);
4130}
4131
4132#[test]
4133fn simple_matcher_updates_offset_history_after_emitting_match() {
4134 let mut matcher = MatchGenerator::new(64);
4135 matcher.add_data(
4136 b"abcdeabcdeabcde".to_vec(),
4137 SuffixStore::with_capacity(64),
4138 |_, _| {},
4139 );
4140
4141 assert!(matcher.next_sequence(|seq| {
4142 assert_eq!(
4143 seq,
4144 Sequence::Triple {
4145 literals: b"abcde",
4146 offset: 5,
4147 match_len: 10,
4148 }
4149 );
4150 }));
4151 assert_eq!(matcher.offset_hist, [5, 1, 4]);
4152}
4153
4154#[test]
4155fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
4156 let mut matcher = MatchGenerator::new(64);
4157 matcher.add_data(
4158 b"abcdefghijabcdefghij".to_vec(),
4159 SuffixStore::with_capacity(64),
4160 |_, _| {},
4161 );
4162
4163 matcher.suffix_idx = 10;
4164 matcher.last_idx_in_sequence = 10;
4165 matcher.offset_hist = [99, 10, 4];
4166
4167 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4168 assert_eq!(candidate, Some((10, 10)));
4169}
4170
4171#[test]
4172fn simple_matcher_repcode_can_target_previous_window_entry() {
4173 let mut matcher = MatchGenerator::new(64);
4174 matcher.add_data(
4175 b"abcdefghij".to_vec(),
4176 SuffixStore::with_capacity(64),
4177 |_, _| {},
4178 );
4179 matcher.skip_matching();
4180 matcher.add_data(
4181 b"abcdefghij".to_vec(),
4182 SuffixStore::with_capacity(64),
4183 |_, _| {},
4184 );
4185
4186 matcher.offset_hist = [99, 10, 4];
4187
4188 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
4189 assert_eq!(candidate, Some((10, 10)));
4190}
4191
4192#[test]
4193fn simple_matcher_zero_literal_repcode_checks_rep2() {
4194 let mut matcher = MatchGenerator::new(64);
4195 matcher.add_data(
4196 b"abcdefghijabcdefghij".to_vec(),
4197 SuffixStore::with_capacity(64),
4198 |_, _| {},
4199 );
4200 matcher.suffix_idx = 10;
4201 matcher.last_idx_in_sequence = 10;
4202 matcher.offset_hist = [99, 4, 10];
4204
4205 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4206 assert_eq!(candidate, Some((10, 10)));
4207}
4208
4209#[test]
4210fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
4211 let mut matcher = MatchGenerator::new(64);
4212 matcher.add_data(
4213 b"abcdefghijabcdefghij".to_vec(),
4214 SuffixStore::with_capacity(64),
4215 |_, _| {},
4216 );
4217 matcher.suffix_idx = 10;
4218 matcher.last_idx_in_sequence = 10;
4219 matcher.offset_hist = [11, 4, 99];
4221
4222 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4223 assert_eq!(candidate, Some((10, 10)));
4224}
4225
4226#[test]
4227fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
4228 let mut matcher = MatchGenerator::new(64);
4229 matcher.add_data(
4230 b"abcdefghij".to_vec(),
4231 SuffixStore::with_capacity(64),
4232 |_, _| {},
4233 );
4234 matcher.skip_matching();
4235 matcher.add_data(
4236 b"klmnopqrst".to_vec(),
4237 SuffixStore::with_capacity(64),
4238 |_, _| {},
4239 );
4240 matcher.suffix_idx = 3;
4241
4242 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4243 assert_eq!(candidate, None);
4244}
4245
4246#[test]
4247fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4248 let mut matcher = MatchGenerator::new(64);
4249 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4250 matcher.add_data(
4251 b"abcdefghijklmnop".to_vec(),
4252 SuffixStore::with_capacity(64),
4253 |_, _| {},
4254 );
4255 matcher.skip_matching();
4256 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4257
4258 assert!(matcher.next_sequence(|seq| {
4259 assert_eq!(
4260 seq,
4261 Sequence::Triple {
4262 literals: b"",
4263 offset: 15,
4264 match_len: 5,
4265 }
4266 );
4267 }));
4268 assert!(!matcher.next_sequence(|_| {}));
4269}
4270
4271#[test]
4272fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4273 let mut matcher = MatchGenerator::new(64);
4274 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4275 matcher.add_data(
4276 b"01234abcde".to_vec(),
4277 SuffixStore::with_capacity(64),
4278 |_, _| {},
4279 );
4280 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4281
4282 let last = matcher.window.last().unwrap();
4283 let tail = &last.data[5..10];
4284 assert_eq!(last.suffixes.get(tail), Some(5));
4285}
4286
4287#[test]
4288fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4289 let mut matcher = MatchGenerator::new(128);
4290 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4291 matcher.add_data(
4292 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4293 SuffixStore::with_capacity(1 << 16),
4294 |_, _| {},
4295 );
4296
4297 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4298
4299 let last = matcher.window.last().unwrap();
4300 let first_key = &last.data[..MIN_MATCH_LEN];
4301 assert_eq!(last.suffixes.get(first_key), None);
4302}
4303
4304#[test]
4305fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4306 let mut matcher = MatchGenerator::new(128);
4307 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4308 matcher.add_data(
4309 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4310 SuffixStore::with_capacity(1 << 16),
4311 |_, _| {},
4312 );
4313
4314 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4315
4316 let last = matcher.window.last().unwrap();
4317 for pos in [0usize, 3, 6, 9, 12] {
4318 let key = &last.data[pos..pos + MIN_MATCH_LEN];
4319 assert_eq!(
4320 last.suffixes.get(key),
4321 Some(pos),
4322 "expected interleaved suffix registration at pos {pos}"
4323 );
4324 }
4325}
4326
4327#[test]
4328fn dfast_skip_matching_handles_window_eviction() {
4329 let mut matcher = DfastMatchGenerator::new(16);
4330
4331 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4332 matcher.skip_matching();
4333 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4334 matcher.skip_matching();
4335 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4336
4337 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4338 matcher.start_matching(|seq| match seq {
4339 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4340 Sequence::Triple {
4341 literals,
4342 offset,
4343 match_len,
4344 } => {
4345 reconstructed.extend_from_slice(literals);
4346 let start = reconstructed.len() - offset;
4347 for i in 0..match_len {
4348 let byte = reconstructed[start + i];
4349 reconstructed.push(byte);
4350 }
4351 }
4352 });
4353
4354 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4355}
4356
4357#[test]
4358fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4359 let mut matcher = DfastMatchGenerator::new(8);
4360
4361 let mut first = Vec::with_capacity(64);
4362 first.extend_from_slice(b"abcdefgh");
4363 matcher.add_data(first, |_| {});
4364
4365 let mut second = Vec::with_capacity(64);
4366 second.extend_from_slice(b"ijklmnop");
4367
4368 let mut observed_evicted_len = None;
4369 matcher.add_data(second, |data| {
4370 observed_evicted_len = Some(data.len());
4371 });
4372
4373 assert_eq!(
4374 observed_evicted_len,
4375 Some(8),
4376 "eviction callback must report evicted byte length, not backing capacity"
4377 );
4378}
4379
4380#[test]
4381fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4382 let mut matcher = DfastMatchGenerator::new(16);
4383
4384 let mut first = Vec::with_capacity(64);
4385 first.extend_from_slice(b"abcdefgh");
4386 matcher.add_data(first, |_| {});
4387
4388 let mut second = Vec::with_capacity(64);
4389 second.extend_from_slice(b"ijklmnop");
4390 matcher.add_data(second, |_| {});
4391
4392 matcher.max_window_size = 8;
4393
4394 let mut observed_evicted_len = None;
4395 matcher.trim_to_window(|data| {
4396 observed_evicted_len = Some(data.len());
4397 });
4398
4399 assert_eq!(
4400 observed_evicted_len,
4401 Some(8),
4402 "trim callback must report evicted byte length, not backing capacity"
4403 );
4404}
4405
4406#[test]
4407fn dfast_inserts_tail_positions_for_next_block_matching() {
4408 let mut matcher = DfastMatchGenerator::new(1 << 22);
4409
4410 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4411 let mut history = Vec::new();
4412 matcher.start_matching(|seq| match seq {
4413 Sequence::Literals { literals } => history.extend_from_slice(literals),
4414 Sequence::Triple { .. } => unreachable!("first block should not match history"),
4415 });
4416 assert_eq!(history, b"012345bcdea");
4417
4418 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4419 let mut saw_first_sequence = false;
4420 matcher.start_matching(|seq| {
4421 assert!(!saw_first_sequence, "expected a single cross-block match");
4422 saw_first_sequence = true;
4423 match seq {
4424 Sequence::Literals { .. } => {
4425 panic!("expected tail-anchored cross-block match before any literals")
4426 }
4427 Sequence::Triple {
4428 literals,
4429 offset,
4430 match_len,
4431 } => {
4432 assert_eq!(literals, b"");
4433 assert_eq!(offset, 5);
4434 assert_eq!(match_len, 11);
4435 let start = history.len() - offset;
4436 for i in 0..match_len {
4437 let byte = history[start + i];
4438 history.push(byte);
4439 }
4440 }
4441 }
4442 });
4443
4444 assert!(
4445 saw_first_sequence,
4446 "expected tail-anchored cross-block match"
4447 );
4448 assert_eq!(history, b"012345bcdeabcdeabcdeab");
4449}
4450
4451#[test]
4452fn fastest_hint_iteration_23_sequences_reconstruct_source() {
4453 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
4454 let mut state = seed;
4455 let mut data = Vec::with_capacity(len);
4456 for _ in 0..len {
4457 state = state
4458 .wrapping_mul(6364136223846793005)
4459 .wrapping_add(1442695040888963407);
4460 data.push((state >> 33) as u8);
4461 }
4462 data
4463 }
4464
4465 let i = 23u64;
4466 let len = (i * 89 % 16384) as usize;
4467 let mut data = generate_data(i, len);
4468 let repeat = data[128..256].to_vec();
4471 data.extend_from_slice(&repeat);
4472 data.extend_from_slice(&repeat);
4473
4474 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
4475 driver.set_source_size_hint(data.len() as u64);
4476 driver.reset(CompressionLevel::Fastest);
4477 let mut space = driver.get_next_space();
4478 space[..data.len()].copy_from_slice(&data);
4479 space.truncate(data.len());
4480 driver.commit_space(space);
4481
4482 let mut rebuilt = Vec::with_capacity(data.len());
4483 let mut saw_triple = false;
4484 driver.start_matching(|seq| match seq {
4485 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
4486 Sequence::Triple {
4487 literals,
4488 offset,
4489 match_len,
4490 } => {
4491 saw_triple = true;
4492 rebuilt.extend_from_slice(literals);
4493 assert!(offset > 0, "offset must be non-zero");
4494 assert!(
4495 offset <= rebuilt.len(),
4496 "offset must reference already-produced bytes: offset={} produced={}",
4497 offset,
4498 rebuilt.len()
4499 );
4500 let start = rebuilt.len() - offset;
4501 for idx in 0..match_len {
4502 let b = rebuilt[start + idx];
4503 rebuilt.push(b);
4504 }
4505 }
4506 });
4507
4508 assert!(saw_triple, "fixture must emit at least one match");
4509 assert_eq!(rebuilt, data);
4510}