1use alloc::{boxed::Box, vec::Vec};
4use core::convert::TryInto;
5#[cfg(feature = "hash")]
6use twox_hash::XxHash64;
7
8#[cfg(feature = "hash")]
9use core::hash::Hasher;
10
11use super::{
12 CompressionLevel, Matcher, block_header::BlockHeader, frame_header::FrameHeader, levels::*,
13 match_generator::MatchGeneratorDriver,
14};
15use crate::common::MAX_BLOCK_SIZE;
16use crate::fse::fse_encoder::{FSETable, default_ll_table, default_ml_table, default_of_table};
17
18use crate::io::{Read, Write};
19
20pub struct FrameCompressor<R: Read, W: Write, M: Matcher> {
40 uncompressed_data: Option<R>,
41 compressed_data: Option<W>,
42 compression_level: CompressionLevel,
43 dictionary: Option<crate::decoding::Dictionary>,
44 dictionary_entropy_cache: Option<CachedDictionaryEntropy>,
45 source_size_hint: Option<u64>,
46 state: CompressState<M>,
47 magicless: bool,
53 #[cfg(feature = "hash")]
54 hasher: XxHash64,
55}
56
57#[derive(Clone, Default)]
58struct CachedDictionaryEntropy {
59 huff: Option<crate::huff0::huff0_encoder::HuffmanTable>,
60 ll_previous: Option<PreviousFseTable>,
61 ml_previous: Option<PreviousFseTable>,
62 of_previous: Option<PreviousFseTable>,
63}
64
65#[derive(Clone)]
66pub(crate) enum PreviousFseTable {
67 Default,
70 Custom(Box<FSETable>),
71 Rle(u8),
72}
73
74impl PreviousFseTable {
75 pub(crate) fn as_table<'a>(&'a self, default: &'a FSETable) -> Option<&'a FSETable> {
76 match self {
77 Self::Default => Some(default),
78 Self::Custom(table) => Some(table),
79 Self::Rle(_) => None,
80 }
81 }
82}
83
84pub(crate) struct FseTables {
85 pub(crate) ll_default: crate::fse::fse_encoder::FseDefaultTable,
97 pub(crate) ll_previous: Option<PreviousFseTable>,
98 pub(crate) ml_default: crate::fse::fse_encoder::FseDefaultTable,
99 pub(crate) ml_previous: Option<PreviousFseTable>,
100 pub(crate) of_default: crate::fse::fse_encoder::FseDefaultTable,
101 pub(crate) of_previous: Option<PreviousFseTable>,
102}
103
104impl FseTables {
105 pub fn new() -> Self {
106 Self {
107 ll_default: default_ll_table(),
108 ll_previous: None,
109 ml_default: default_ml_table(),
110 ml_previous: None,
111 of_default: default_of_table(),
112 of_previous: None,
113 }
114 }
115
116 #[inline]
123 #[allow(clippy::borrow_deref_ref)]
124 pub(crate) fn ll_default_ref(&self) -> &FSETable {
125 &*self.ll_default
126 }
127
128 #[inline]
130 #[allow(clippy::borrow_deref_ref)]
131 pub(crate) fn ml_default_ref(&self) -> &FSETable {
132 &*self.ml_default
133 }
134
135 #[inline]
137 #[allow(clippy::borrow_deref_ref)]
138 pub(crate) fn of_default_ref(&self) -> &FSETable {
139 &*self.of_default
140 }
141}
142
143const PRESPLIT_BLOCK_MIN: usize = 3500;
144const PRESPLIT_THRESHOLD_PENALTY_RATE: u64 = 16;
145const PRESPLIT_THRESHOLD_BASE: u64 = PRESPLIT_THRESHOLD_PENALTY_RATE - 2;
146const PRESPLIT_THRESHOLD_PENALTY: i32 = 3;
147const PRESPLIT_CHUNK_SIZE: usize = 8 << 10;
148const PRESPLIT_HASH_LOG_MAX: usize = 10;
149const PRESPLIT_HASH_TABLE_SIZE: usize = 1 << PRESPLIT_HASH_LOG_MAX;
150const PRESPLIT_KNUTH: u32 = 0x9E37_79B9;
151const PRESPLIT_BORDERS_SEGMENT: usize = 512;
156
157#[derive(Clone)]
158struct PreSplitFingerprint {
159 events: [u32; PRESPLIT_HASH_TABLE_SIZE],
160 nb_events: usize,
161}
162
163impl Default for PreSplitFingerprint {
164 fn default() -> Self {
165 Self {
166 events: [0; PRESPLIT_HASH_TABLE_SIZE],
167 nb_events: 0,
168 }
169 }
170}
171
172fn presplit_hash2(bytes: &[u8], hash_log: usize) -> usize {
173 debug_assert!(hash_log >= 8);
174 if hash_log == 8 {
175 return bytes[0] as usize;
176 }
177 debug_assert!(hash_log <= PRESPLIT_HASH_LOG_MAX);
178 let value = u16::from_le_bytes([bytes[0], bytes[1]]) as u32;
179 (value.wrapping_mul(PRESPLIT_KNUTH) >> (32 - hash_log)) as usize
180}
181
182fn presplit_record_fingerprint(
183 fp: &mut PreSplitFingerprint,
184 src: &[u8],
185 sampling_rate: usize,
186 hash_log: usize,
187) {
188 fp.events.fill(0);
189 fp.nb_events = 0;
190 if src.len() < 2 {
191 return;
192 }
193 let limit = src.len() - 1;
194 let mut n = 0usize;
195 while n < limit {
196 fp.events[presplit_hash2(&src[n..], hash_log)] += 1;
197 n += sampling_rate;
198 }
199 fp.nb_events += limit / sampling_rate;
202}
203
204fn presplit_record_byte_histogram(fp: &mut PreSplitFingerprint, src: &[u8]) {
210 fp.events.fill(0);
211 for &b in src {
212 fp.events[b as usize] += 1;
213 }
214 fp.nb_events = src.len();
217}
218
219fn presplit_distance(lhs: &PreSplitFingerprint, rhs: &PreSplitFingerprint, hash_log: usize) -> u64 {
220 let slots = 1usize << hash_log;
221 let mut distance = 0u64;
222 for idx in 0..slots {
223 let left = lhs.events[idx] as i128 * rhs.nb_events as i128;
224 let right = rhs.events[idx] as i128 * lhs.nb_events as i128;
225 distance = distance.saturating_add(left.abs_diff(right) as u64);
226 }
227 distance
228}
229
230fn presplit_fingerprints_differ(
231 reference: &PreSplitFingerprint,
232 new_fp: &PreSplitFingerprint,
233 penalty: i32,
234 hash_log: usize,
235) -> bool {
236 debug_assert!(reference.nb_events > 0);
237 debug_assert!(new_fp.nb_events > 0);
238 let p50 = reference.nb_events as u64 * new_fp.nb_events as u64;
239 let deviation = presplit_distance(reference, new_fp, hash_log);
240 let threshold = p50.saturating_mul(PRESPLIT_THRESHOLD_BASE + penalty as u64)
241 / PRESPLIT_THRESHOLD_PENALTY_RATE;
242 deviation >= threshold
243}
244
245fn presplit_merge_events(acc: &mut PreSplitFingerprint, new_fp: &PreSplitFingerprint) {
246 for idx in 0..PRESPLIT_HASH_TABLE_SIZE {
247 acc.events[idx] = acc.events[idx].saturating_add(new_fp.events[idx]);
248 }
249 acc.nb_events = acc.nb_events.saturating_add(new_fp.nb_events);
250}
251
252fn donor_split_block_by_chunks(block: &[u8], level: usize) -> usize {
253 debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
254 debug_assert!((1..=4).contains(&level));
255 let (sampling_rate, hash_log) = match level - 1 {
256 0 => (43, 8),
257 1 => (11, 9),
258 2 => (5, 10),
259 _ => (1, 10),
260 };
261
262 let mut past = PreSplitFingerprint::default();
263 let mut new_events = PreSplitFingerprint::default();
264 let mut penalty = PRESPLIT_THRESHOLD_PENALTY;
265 presplit_record_fingerprint(
266 &mut past,
267 &block[..PRESPLIT_CHUNK_SIZE],
268 sampling_rate,
269 hash_log,
270 );
271 let mut pos = PRESPLIT_CHUNK_SIZE;
272 while pos <= block.len() - PRESPLIT_CHUNK_SIZE {
273 presplit_record_fingerprint(
274 &mut new_events,
275 &block[pos..pos + PRESPLIT_CHUNK_SIZE],
276 sampling_rate,
277 hash_log,
278 );
279 if presplit_fingerprints_differ(&past, &new_events, penalty, hash_log) {
280 return pos;
281 }
282 presplit_merge_events(&mut past, &new_events);
283 if penalty > 0 {
284 penalty -= 1;
285 }
286 pos += PRESPLIT_CHUNK_SIZE;
287 }
288 block.len()
289}
290
291fn donor_split_block_from_borders(block: &[u8]) -> usize {
299 debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
300 let block_size = block.len();
301 let mut past = PreSplitFingerprint::default();
302 let mut new_fp = PreSplitFingerprint::default();
303 presplit_record_byte_histogram(&mut past, &block[..PRESPLIT_BORDERS_SEGMENT]);
304 presplit_record_byte_histogram(&mut new_fp, &block[block_size - PRESPLIT_BORDERS_SEGMENT..]);
305 if !presplit_fingerprints_differ(&past, &new_fp, 0, 8) {
308 return block_size;
309 }
310
311 let mut middle = PreSplitFingerprint::default();
312 let mid_start = block_size / 2 - PRESPLIT_BORDERS_SEGMENT / 2;
313 presplit_record_byte_histogram(
314 &mut middle,
315 &block[mid_start..mid_start + PRESPLIT_BORDERS_SEGMENT],
316 );
317
318 let dist_from_begin = presplit_distance(&past, &middle, 8);
319 let dist_from_end = presplit_distance(&new_fp, &middle, 8);
320 let min_distance = (PRESPLIT_BORDERS_SEGMENT as u64) * (PRESPLIT_BORDERS_SEGMENT as u64) / 3;
324 if dist_from_begin.abs_diff(dist_from_end) < min_distance {
325 return 64 * 1024;
326 }
327 if dist_from_begin > dist_from_end {
336 32 * 1024
337 } else {
338 96 * 1024
339 }
340}
341
342fn donor_pre_split_level(level: CompressionLevel) -> Option<usize> {
343 match level {
344 CompressionLevel::Level(11..=15) => Some(0),
350 CompressionLevel::Level(16..=22) => Some(4),
354 _ => None,
355 }
356}
357
358#[cfg(feature = "bench_internals")]
366pub(crate) fn block_splitter_decision_for_bench(block: &[u8], split_level: usize) -> usize {
367 assert_eq!(
368 block.len(),
369 MAX_BLOCK_SIZE as usize,
370 "block_splitter_decision_for_bench expects exactly MAX_BLOCK_SIZE bytes"
371 );
372 assert!(
373 split_level <= 4,
374 "block_splitter_decision_for_bench: split_level must be in 0..=4, got {split_level}"
375 );
376 if split_level == 0 {
377 donor_split_block_from_borders(block)
378 } else {
379 donor_split_block_by_chunks(block, split_level)
380 }
381}
382
383pub(crate) fn donor_optimal_block_size(
384 level: CompressionLevel,
385 block: &[u8],
386 remaining_src_size: usize,
387 block_size_max: usize,
388 savings: i64,
389) -> usize {
390 let Some(split_level) = donor_pre_split_level(level) else {
391 return remaining_src_size.min(block_size_max);
392 };
393 if remaining_src_size < MAX_BLOCK_SIZE as usize || block_size_max < MAX_BLOCK_SIZE as usize {
394 return remaining_src_size.min(block_size_max);
395 }
396 if savings < 3 {
397 return MAX_BLOCK_SIZE as usize;
398 }
399 if block.len() < MAX_BLOCK_SIZE as usize {
400 return remaining_src_size.min(block_size_max);
401 }
402 let raw_split = if split_level == 0 {
407 donor_split_block_from_borders(&block[..MAX_BLOCK_SIZE as usize])
408 } else {
409 donor_split_block_by_chunks(&block[..MAX_BLOCK_SIZE as usize], split_level)
410 };
411 raw_split
412 .max(PRESPLIT_BLOCK_MIN)
413 .min(MAX_BLOCK_SIZE as usize)
414}
415
416pub(crate) struct CompressState<M: Matcher> {
417 pub(crate) matcher: M,
418 pub(crate) last_huff_table: Option<crate::huff0::huff0_encoder::HuffmanTable>,
419 pub(crate) fse_tables: FseTables,
420 pub(crate) block_scratch: crate::encoding::blocks::CompressedBlockScratch,
421 pub(crate) offset_hist: [u32; 3],
424 pub(crate) strategy_tag: crate::encoding::strategy::StrategyTag,
442}
443
444impl<R: Read, W: Write> FrameCompressor<R, W, MatchGeneratorDriver> {
445 pub fn new(compression_level: CompressionLevel) -> Self {
447 Self {
448 uncompressed_data: None,
449 compressed_data: None,
450 compression_level,
451 dictionary: None,
452 dictionary_entropy_cache: None,
453 source_size_hint: None,
454 state: CompressState {
455 matcher: MatchGeneratorDriver::new(1024 * 128, 1),
456 last_huff_table: None,
457 fse_tables: FseTables::new(),
458 block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
459 offset_hist: [1, 4, 8],
460 strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
461 compression_level,
462 ),
463 },
464 magicless: false,
465 #[cfg(feature = "hash")]
466 hasher: XxHash64::with_seed(0),
467 }
468 }
469}
470
471impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
472 pub fn new_with_matcher(matcher: M, compression_level: CompressionLevel) -> Self {
474 Self {
475 uncompressed_data: None,
476 compressed_data: None,
477 dictionary: None,
478 dictionary_entropy_cache: None,
479 source_size_hint: None,
480 state: CompressState {
481 matcher,
482 last_huff_table: None,
483 fse_tables: FseTables::new(),
484 block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
485 offset_hist: [1, 4, 8],
486 strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
487 compression_level,
488 ),
489 },
490 compression_level,
491 magicless: false,
492 #[cfg(feature = "hash")]
493 hasher: XxHash64::with_seed(0),
494 }
495 }
496
497 pub fn set_magicless(&mut self, magicless: bool) {
504 self.magicless = magicless;
505 }
506
507 pub fn set_source(&mut self, uncompressed_data: R) -> Option<R> {
511 self.uncompressed_data.replace(uncompressed_data)
512 }
513
514 pub fn set_drain(&mut self, compressed_data: W) -> Option<W> {
518 self.compressed_data.replace(compressed_data)
519 }
520
521 pub fn set_source_size_hint(&mut self, size: u64) {
531 self.source_size_hint = Some(size);
532 }
533
534 pub fn compress(&mut self) {
545 let initial_size_hint = self.source_size_hint;
546 let source_size_hint_known = initial_size_hint.is_some();
547 let use_dictionary_state =
548 !matches!(self.compression_level, CompressionLevel::Uncompressed)
549 && self.state.matcher.supports_dictionary_priming()
550 && self.dictionary.is_some();
551 if let Some(size_hint) = self.source_size_hint.take() {
552 self.state.matcher.set_source_size_hint(size_hint);
555 }
556 self.state.matcher.reset(self.compression_level);
558 self.state.offset_hist = [1, 4, 8];
559 self.state.strategy_tag =
566 crate::encoding::strategy::StrategyTag::for_compression_level(self.compression_level);
567 let cached_entropy = if use_dictionary_state {
568 self.dictionary_entropy_cache.as_ref()
569 } else {
570 None
571 };
572 if use_dictionary_state && let Some(dict) = self.dictionary.as_ref() {
573 self.state.offset_hist = dict.offset_hist;
576 self.state
577 .matcher
578 .prime_with_dictionary(dict.dict_content.as_slice(), dict.offset_hist);
579 }
580 if let Some(cache) = cached_entropy {
581 self.state.last_huff_table.clone_from(&cache.huff);
582 } else {
583 self.state.last_huff_table = None;
584 }
585 if let Some(cache) = cached_entropy {
588 self.state
589 .fse_tables
590 .ll_previous
591 .clone_from(&cache.ll_previous);
592 self.state
593 .fse_tables
594 .ml_previous
595 .clone_from(&cache.ml_previous);
596 self.state
597 .fse_tables
598 .of_previous
599 .clone_from(&cache.of_previous);
600 } else {
601 self.state.fse_tables.ll_previous = None;
602 self.state.fse_tables.ml_previous = None;
603 self.state.fse_tables.of_previous = None;
604 }
605 let ll_entropy = cached_entropy.and_then(|cache| match cache.ll_previous.as_ref() {
606 Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
607 _ => None,
608 });
609 let ml_entropy = cached_entropy.and_then(|cache| match cache.ml_previous.as_ref() {
610 Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
611 _ => None,
612 });
613 let of_entropy = cached_entropy.and_then(|cache| match cache.of_previous.as_ref() {
614 Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
615 _ => None,
616 });
617 self.state.matcher.seed_dictionary_entropy(
618 self.state.last_huff_table.as_ref(),
619 ll_entropy,
620 ml_entropy,
621 of_entropy,
622 );
623 #[cfg(feature = "hash")]
624 {
625 self.hasher = XxHash64::with_seed(0);
626 }
627 let source = self.uncompressed_data.as_mut().unwrap();
628 let drain = self.compressed_data.as_mut().unwrap();
629 let window_size = self.state.matcher.window_size();
630 assert!(
631 window_size != 0,
632 "matcher reported window_size == 0, which is invalid"
633 );
634 const ALL_BLOCKS_TINY_THRESHOLD: u64 = 4 * 1024;
654 const ALL_BLOCKS_SMALL_THRESHOLD: u64 = 64 * 1024;
655 const ALL_BLOCKS_TINY_CAP: usize = 4 * 1024;
656 const ALL_BLOCKS_SMALL_CAP: usize = 16 * 1024;
657 const ALL_BLOCKS_DEFAULT_CAP: usize = 130 * 1024;
658 let initial_all_blocks_cap = match initial_size_hint {
659 Some(h) if h <= ALL_BLOCKS_TINY_THRESHOLD => ALL_BLOCKS_TINY_CAP,
660 Some(h) if h <= ALL_BLOCKS_SMALL_THRESHOLD => ALL_BLOCKS_SMALL_CAP,
661 _ => ALL_BLOCKS_DEFAULT_CAP,
662 };
663 let mut all_blocks: Vec<u8> = Vec::with_capacity(initial_all_blocks_cap);
664 let mut total_uncompressed: u64 = 0;
665 let mut pending_input: Vec<u8> = Vec::new();
666 let mut reached_eof = false;
667 let mut savings = 0i64;
668 loop {
670 let block_capacity = MAX_BLOCK_SIZE as usize;
674 let had_pending = !pending_input.is_empty();
675 let mut uncompressed_data = if had_pending {
676 core::mem::take(&mut pending_input)
677 } else {
678 self.state.matcher.get_next_space()
679 };
680 let mut filled = if had_pending {
681 uncompressed_data.len()
682 } else {
683 0
684 };
685 if uncompressed_data.len() < block_capacity {
686 uncompressed_data.resize(block_capacity, 0);
687 }
688 'read_loop: loop {
689 if reached_eof || filled == block_capacity {
690 break 'read_loop;
691 }
692 let new_bytes = source
693 .read(&mut uncompressed_data[filled..block_capacity])
694 .unwrap();
695 if new_bytes == 0 {
696 reached_eof = true;
697 break 'read_loop;
698 }
699 filled += new_bytes;
700 total_uncompressed += new_bytes as u64;
701 }
702 uncompressed_data.truncate(filled);
703 let mut last_block = reached_eof;
704 let remaining_for_split = if reached_eof {
705 uncompressed_data.len()
706 } else {
707 block_capacity
708 };
709 if !matches!(self.compression_level, CompressionLevel::Uncompressed)
710 && uncompressed_data.len() == block_capacity
711 {
712 let block_len = donor_optimal_block_size(
713 self.compression_level,
714 &uncompressed_data,
715 remaining_for_split,
716 block_capacity,
717 savings,
718 );
719 if block_len < uncompressed_data.len() {
720 pending_input = uncompressed_data.split_off(block_len);
721 if pending_input.capacity() < block_capacity {
728 pending_input.reserve_exact(block_capacity - pending_input.len());
729 }
730 last_block = false;
731 }
732 }
733 #[cfg(feature = "hash")]
735 self.hasher.write(&uncompressed_data);
736 if uncompressed_data.is_empty() {
738 let header = BlockHeader {
739 last_block: true,
740 block_type: crate::blocks::block::BlockType::Raw,
741 block_size: 0,
742 };
743 header.serialize(&mut all_blocks);
744 break;
745 }
746
747 match self.compression_level {
748 CompressionLevel::Uncompressed => {
749 let header = BlockHeader {
750 last_block,
751 block_type: crate::blocks::block::BlockType::Raw,
752 block_size: uncompressed_data.len().try_into().unwrap(),
753 };
754 header.serialize(&mut all_blocks);
755 all_blocks.extend_from_slice(&uncompressed_data);
756 savings +=
757 uncompressed_data.len() as i64 - (3 + uncompressed_data.len()) as i64;
758 }
759 CompressionLevel::Fastest
760 | CompressionLevel::Default
761 | CompressionLevel::Better
762 | CompressionLevel::Best
763 | CompressionLevel::Level(_) => {
764 let before_len = all_blocks.len();
765 let block_len = uncompressed_data.len();
766 compress_block_encoded(
767 &mut self.state,
768 self.compression_level,
769 last_block,
770 uncompressed_data,
771 &mut all_blocks,
772 );
773 savings += block_len as i64 - (all_blocks.len() - before_len) as i64;
774 }
775 }
776 if last_block && pending_input.is_empty() {
777 break;
778 }
779 }
780
781 let single_segment = !use_dictionary_state
785 && source_size_hint_known
786 && total_uncompressed >= 512
787 && total_uncompressed <= window_size;
788 let header = FrameHeader {
789 frame_content_size: Some(total_uncompressed),
790 single_segment,
791 content_checksum: cfg!(feature = "hash"),
792 dictionary_id: if use_dictionary_state {
793 self.dictionary.as_ref().map(|dict| dict.id as u64)
794 } else {
795 None
796 },
797 window_size: if single_segment {
798 None
799 } else {
800 Some(window_size)
801 },
802 magicless: self.magicless,
803 };
804 let mut header_buf: Vec<u8> = Vec::with_capacity(14);
807 header.serialize(&mut header_buf);
808 drain.write_all(&header_buf).unwrap();
809 drain.write_all(&all_blocks).unwrap();
810
811 #[cfg(feature = "hash")]
814 {
815 let content_checksum = self.hasher.finish();
818 drain
819 .write_all(&(content_checksum as u32).to_le_bytes())
820 .unwrap();
821 }
822 }
823
824 pub fn source_mut(&mut self) -> Option<&mut R> {
826 self.uncompressed_data.as_mut()
827 }
828
829 pub fn drain_mut(&mut self) -> Option<&mut W> {
831 self.compressed_data.as_mut()
832 }
833
834 pub fn source(&self) -> Option<&R> {
836 self.uncompressed_data.as_ref()
837 }
838
839 pub fn drain(&self) -> Option<&W> {
841 self.compressed_data.as_ref()
842 }
843
844 pub fn take_source(&mut self) -> Option<R> {
846 self.uncompressed_data.take()
847 }
848
849 pub fn take_drain(&mut self) -> Option<W> {
851 self.compressed_data.take()
852 }
853
854 pub fn replace_matcher(&mut self, mut match_generator: M) -> M {
856 core::mem::swap(&mut match_generator, &mut self.state.matcher);
857 match_generator
858 }
859
860 pub fn set_compression_level(
862 &mut self,
863 compression_level: CompressionLevel,
864 ) -> CompressionLevel {
865 let old = self.compression_level;
866 self.compression_level = compression_level;
867 old
868 }
869
870 pub fn compression_level(&self) -> CompressionLevel {
872 self.compression_level
873 }
874
875 pub fn set_dictionary(
882 &mut self,
883 dictionary: crate::decoding::Dictionary,
884 ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
885 {
886 if dictionary.id == 0 {
887 return Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId);
888 }
889 if let Some(index) = dictionary.offset_hist.iter().position(|&rep| rep == 0) {
890 return Err(
891 crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
892 index: index as u8,
893 },
894 );
895 }
896 self.dictionary_entropy_cache = Some(CachedDictionaryEntropy {
897 huff: dictionary.huf.table.to_encoder_table(),
898 ll_previous: dictionary
899 .fse
900 .literal_lengths
901 .to_encoder_table()
902 .map(|table| PreviousFseTable::Custom(Box::new(table))),
903 ml_previous: dictionary
904 .fse
905 .match_lengths
906 .to_encoder_table()
907 .map(|table| PreviousFseTable::Custom(Box::new(table))),
908 of_previous: dictionary
909 .fse
910 .offsets
911 .to_encoder_table()
912 .map(|table| PreviousFseTable::Custom(Box::new(table))),
913 });
914 Ok(self.dictionary.replace(dictionary))
915 }
916
917 pub fn set_dictionary_from_bytes(
919 &mut self,
920 raw_dictionary: &[u8],
921 ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
922 {
923 let dictionary = crate::decoding::Dictionary::decode_dict(raw_dictionary)?;
924 self.set_dictionary(dictionary)
925 }
926
927 pub fn clear_dictionary(&mut self) -> Option<crate::decoding::Dictionary> {
929 self.dictionary_entropy_cache = None;
930 self.dictionary.take()
931 }
932}
933
934#[cfg(test)]
935mod tests {
936 #[cfg(all(feature = "dict_builder", feature = "std"))]
937 use alloc::format;
938 use alloc::vec;
939
940 use super::FrameCompressor;
941 use crate::blocks::block::BlockType;
942 use crate::common::{MAGIC_NUM, MAX_BLOCK_SIZE};
943 use crate::decoding::{FrameDecoder, block_decoder, frame::read_frame_header};
944 use crate::encoding::{Matcher, Sequence};
945 use alloc::vec::Vec;
946
947 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
948 let mut state = seed;
949 let mut data = Vec::with_capacity(len);
950 for _ in 0..len {
951 state = state
952 .wrapping_mul(6364136223846793005)
953 .wrapping_add(1442695040888963407);
954 data.push((state >> 33) as u8);
955 }
956 data
957 }
958
959 fn first_block_type(frame: &[u8]) -> BlockType {
960 let (_, header_size) = read_frame_header(frame).expect("frame header should parse");
961 let mut decoder = block_decoder::new();
962 let (header, _) = decoder
963 .read_block_header(&frame[header_size as usize..])
964 .expect("block header should parse");
965 header.block_type
966 }
967
968 #[cfg(feature = "std")]
970 #[test]
971 fn fcs_header_written_and_c_zstd_compatible() {
972 let levels = [
973 crate::encoding::CompressionLevel::Uncompressed,
974 crate::encoding::CompressionLevel::Fastest,
975 crate::encoding::CompressionLevel::Default,
976 crate::encoding::CompressionLevel::Better,
977 crate::encoding::CompressionLevel::Best,
978 ];
979 let fcs_2byte = vec![0xCDu8; 300]; let large = vec![0xABu8; 100_000];
981 let inputs: [&[u8]; 5] = [
982 &[],
983 &[0x00],
984 b"abcdefghijklmnopqrstuvwxy\n",
985 &fcs_2byte,
986 &large,
987 ];
988 for level in levels {
989 for data in &inputs {
990 let compressed = crate::encoding::compress_to_vec(*data, level);
991 let header = crate::decoding::frame::read_frame_header(compressed.as_slice())
993 .unwrap()
994 .0;
995 assert_eq!(
996 header.frame_content_size(),
997 data.len() as u64,
998 "FCS mismatch for len={} level={:?}",
999 data.len(),
1000 level,
1001 );
1002 assert_ne!(
1005 header.descriptor.frame_content_size_bytes().unwrap(),
1006 0,
1007 "FCS field must be present for len={} level={:?}",
1008 data.len(),
1009 level,
1010 );
1011 let mut decoded = Vec::new();
1013 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1014 |e| {
1015 panic!(
1016 "C zstd decode failed for len={} level={level:?}: {e}",
1017 data.len()
1018 )
1019 },
1020 );
1021 assert_eq!(
1022 decoded.as_slice(),
1023 *data,
1024 "C zstd roundtrip failed for len={}",
1025 data.len()
1026 );
1027 }
1028 }
1029 }
1030
1031 #[cfg(feature = "std")]
1032 #[test]
1033 fn source_size_hint_fastest_remains_ffi_compatible_small_input() {
1034 let data = vec![0xAB; 2047];
1035 let compressed = {
1036 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1037 compressor.set_source_size_hint(data.len() as u64);
1038 compressor.set_source(data.as_slice());
1039 let mut out = Vec::new();
1040 compressor.set_drain(&mut out);
1041 compressor.compress();
1042 out
1043 };
1044
1045 let mut decoded = Vec::new();
1046 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1047 assert_eq!(decoded, data);
1048 }
1049
1050 #[cfg(feature = "std")]
1051 #[test]
1052 fn small_hinted_default_frame_uses_single_segment_header() {
1053 let data = generate_data(0xD15E_A5ED, 1024);
1054 let compressed = {
1055 let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
1056 compressor.set_source_size_hint(data.len() as u64);
1057 compressor.set_source(data.as_slice());
1058 let mut out = Vec::new();
1059 compressor.set_drain(&mut out);
1060 compressor.compress();
1061 out
1062 };
1063
1064 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1065 assert!(
1066 frame_header.descriptor.single_segment_flag(),
1067 "small hinted default frames should use single-segment header for Rust/FFI parity"
1068 );
1069 assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1070 let mut decoded = Vec::new();
1071 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1072 .expect("ffi decoder must accept single-segment small hinted default frame");
1073 assert_eq!(decoded, data);
1074 }
1075
1076 #[cfg(feature = "std")]
1077 #[test]
1078 fn small_hinted_numeric_default_levels_use_single_segment_header() {
1079 let data = generate_data(0xA11C_E003, 1024);
1080 for level in [
1081 super::CompressionLevel::Level(0),
1082 super::CompressionLevel::Level(3),
1083 ] {
1084 let compressed = {
1085 let mut compressor = FrameCompressor::new(level);
1086 compressor.set_source_size_hint(data.len() as u64);
1087 compressor.set_source(data.as_slice());
1088 let mut out = Vec::new();
1089 compressor.set_drain(&mut out);
1090 compressor.compress();
1091 out
1092 };
1093
1094 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1095 assert!(
1096 frame_header.descriptor.single_segment_flag(),
1097 "small hinted numeric default level frames should use single-segment header (level={level:?})"
1098 );
1099 assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1100 let mut decoded = Vec::new();
1101 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1102 panic!(
1103 "ffi decoder must accept single-segment small hinted numeric default level frame (level={level:?}): {e}"
1104 )
1105 });
1106 assert_eq!(decoded, data);
1107 }
1108 }
1109
1110 #[cfg(feature = "std")]
1111 #[test]
1112 fn source_size_hint_levels_remain_ffi_compatible_small_inputs_matrix() {
1113 let levels = [
1114 super::CompressionLevel::Fastest,
1115 super::CompressionLevel::Default,
1116 super::CompressionLevel::Better,
1117 super::CompressionLevel::Best,
1118 super::CompressionLevel::Level(-1),
1119 super::CompressionLevel::Level(2),
1120 super::CompressionLevel::Level(3),
1121 super::CompressionLevel::Level(4),
1122 super::CompressionLevel::Level(11),
1123 ];
1124 let sizes = [
1125 511usize, 512, 513, 1023, 1024, 1536, 2047, 2048, 4095, 4096, 8191, 16_384, 16_385,
1126 ];
1127
1128 for (seed_idx, seed) in [11u64, 23, 41].into_iter().enumerate() {
1129 for &size in &sizes {
1130 let data = generate_data(seed + seed_idx as u64, size);
1131 for &level in &levels {
1132 let compressed = {
1133 let mut compressor = FrameCompressor::new(level);
1134 compressor.set_source_size_hint(data.len() as u64);
1135 compressor.set_source(data.as_slice());
1136 let mut out = Vec::new();
1137 compressor.set_drain(&mut out);
1138 compressor.compress();
1139 out
1140 };
1141 if matches!(size, 511 | 512) {
1142 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1143 assert_eq!(
1144 frame_header.descriptor.single_segment_flag(),
1145 size == 512,
1146 "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1147 );
1148 }
1149
1150 let mut decoded = Vec::new();
1151 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1152 |e| {
1153 panic!(
1154 "ffi decode failed with source-size hint: level={level:?} size={size} seed={} err={e}",
1155 seed + seed_idx as u64
1156 )
1157 },
1158 );
1159 assert_eq!(
1160 decoded,
1161 data,
1162 "hinted ffi roundtrip mismatch: level={level:?} size={size} seed={}",
1163 seed + seed_idx as u64
1164 );
1165 }
1166 }
1167 }
1168 }
1169
1170 #[cfg(feature = "std")]
1171 #[test]
1172 fn hinted_levels_use_single_segment_header_symmetrically() {
1173 let levels = [
1174 super::CompressionLevel::Fastest,
1175 super::CompressionLevel::Default,
1176 super::CompressionLevel::Better,
1177 super::CompressionLevel::Best,
1178 super::CompressionLevel::Level(0),
1179 super::CompressionLevel::Level(2),
1180 super::CompressionLevel::Level(3),
1181 super::CompressionLevel::Level(4),
1182 super::CompressionLevel::Level(11),
1183 ];
1184 for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1185 let size = 1024 + seed_idx * 97;
1186 let data = generate_data(seed, size);
1187 for &level in &levels {
1188 let compressed = {
1189 let mut compressor = FrameCompressor::new(level);
1190 compressor.set_source_size_hint(data.len() as u64);
1191 compressor.set_source(data.as_slice());
1192 let mut out = Vec::new();
1193 compressor.set_drain(&mut out);
1194 compressor.compress();
1195 out
1196 };
1197 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1198 assert!(
1199 frame_header.descriptor.single_segment_flag(),
1200 "hinted frame should be single-segment for level={level:?} size={}",
1201 data.len()
1202 );
1203 assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1204 let mut decoded = Vec::new();
1205 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1206 panic!(
1207 "ffi decode failed for hinted single-segment parity: level={level:?} size={} err={e}",
1208 data.len()
1209 )
1210 });
1211 assert_eq!(decoded, data);
1212 }
1213 }
1214 }
1215
1216 #[cfg(feature = "std")]
1217 #[test]
1218 fn hinted_levels_pin_511_512_single_segment_boundary() {
1219 let levels = [
1220 super::CompressionLevel::Fastest,
1221 super::CompressionLevel::Default,
1222 super::CompressionLevel::Better,
1223 super::CompressionLevel::Best,
1224 super::CompressionLevel::Level(0),
1225 super::CompressionLevel::Level(2),
1226 super::CompressionLevel::Level(3),
1227 super::CompressionLevel::Level(4),
1228 super::CompressionLevel::Level(11),
1229 ];
1230 for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1231 for &size in &[511usize, 512] {
1232 let data = generate_data(seed + seed_idx as u64, size);
1233 for &level in &levels {
1234 let compressed = {
1235 let mut compressor = FrameCompressor::new(level);
1236 compressor.set_source_size_hint(data.len() as u64);
1237 compressor.set_source(data.as_slice());
1238 let mut out = Vec::new();
1239 compressor.set_drain(&mut out);
1240 compressor.compress();
1241 out
1242 };
1243 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1244 assert_eq!(
1245 frame_header.descriptor.single_segment_flag(),
1246 size == 512,
1247 "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1248 );
1249 let mut decoded = Vec::new();
1250 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1251 |e| {
1252 panic!(
1253 "ffi decode failed at single-segment boundary: level={level:?} size={size} seed={} err={e}",
1254 seed + seed_idx as u64
1255 )
1256 },
1257 );
1258 assert_eq!(decoded, data);
1259 }
1260 }
1261 }
1262 }
1263
1264 #[cfg(feature = "std")]
1265 #[test]
1266 fn fastest_random_block_uses_raw_fast_path() {
1267 let data = generate_data(0xC0FF_EE11, 10 * 1024);
1268 let compressed =
1269 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Fastest);
1270
1271 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1272
1273 let mut decoded = Vec::new();
1274 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1275 assert_eq!(decoded, data);
1276 }
1277
1278 #[cfg(feature = "std")]
1279 #[test]
1280 fn default_random_block_uses_raw_fast_path() {
1281 let data = generate_data(0xD15E_A5ED, 10 * 1024);
1282 let compressed =
1283 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Default);
1284
1285 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1286
1287 let mut decoded = Vec::new();
1288 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1289 assert_eq!(decoded, data);
1290 }
1291
1292 #[cfg(feature = "std")]
1293 #[test]
1294 fn best_random_block_uses_raw_fast_path() {
1295 let data = generate_data(0xB35C_AFE1, 10 * 1024);
1296 let compressed =
1297 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Best);
1298
1299 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1300
1301 let mut decoded = Vec::new();
1302 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1303 assert_eq!(decoded, data);
1304 }
1305
1306 #[cfg(feature = "std")]
1307 #[test]
1308 fn level2_random_block_uses_raw_fast_path() {
1309 let data = generate_data(0xA11C_E222, 10 * 1024);
1310 let compressed =
1311 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Level(2));
1312
1313 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1314
1315 let mut decoded = Vec::new();
1316 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1317 assert_eq!(decoded, data);
1318 }
1319
1320 #[cfg(feature = "std")]
1321 #[test]
1322 fn better_random_block_uses_raw_fast_path() {
1323 let data = generate_data(0xBE77_E111, 10 * 1024);
1324 let compressed =
1325 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Better);
1326
1327 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1328
1329 let mut decoded = Vec::new();
1330 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1331 assert_eq!(decoded, data);
1332 }
1333
1334 #[cfg(feature = "std")]
1335 #[test]
1336 fn compressible_logs_do_not_fall_back_to_raw_fast_path() {
1337 let mut data = Vec::with_capacity(16 * 1024);
1338 const LINE: &[u8] =
1339 b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1340 while data.len() < 16 * 1024 {
1341 let remaining = 16 * 1024 - data.len();
1342 data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1343 }
1344
1345 fn assert_not_raw_for_level(data: &[u8], level: super::CompressionLevel) {
1346 let compressed = crate::encoding::compress_to_vec(data, level);
1347 assert_ne!(first_block_type(&compressed), BlockType::Raw);
1348 assert!(
1349 compressed.len() < data.len(),
1350 "compressible input should remain compressible for level={level:?}"
1351 );
1352 let mut decoded = Vec::new();
1353 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1354 assert_eq!(decoded, data);
1355 }
1356
1357 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Fastest);
1358 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Default);
1359 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Level(3));
1360 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Better);
1361 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Best);
1362 }
1363
1364 #[cfg(feature = "std")]
1365 #[test]
1366 fn hinted_small_compressible_frames_use_single_segment_across_levels() {
1367 let mut data = Vec::with_capacity(4 * 1024);
1368 const LINE: &[u8] =
1369 b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1370 while data.len() < 4 * 1024 {
1371 let remaining = 4 * 1024 - data.len();
1372 data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1373 }
1374
1375 for level in [
1376 super::CompressionLevel::Fastest,
1377 super::CompressionLevel::Default,
1378 super::CompressionLevel::Better,
1379 super::CompressionLevel::Best,
1380 super::CompressionLevel::Level(0),
1381 super::CompressionLevel::Level(3),
1382 super::CompressionLevel::Level(4),
1383 super::CompressionLevel::Level(11),
1384 ] {
1385 let compressed = {
1386 let mut compressor = FrameCompressor::new(level);
1387 compressor.set_source_size_hint(data.len() as u64);
1388 compressor.set_source(data.as_slice());
1389 let mut out = Vec::new();
1390 compressor.set_drain(&mut out);
1391 compressor.compress();
1392 out
1393 };
1394 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1395 assert!(
1396 frame_header.descriptor.single_segment_flag(),
1397 "hinted small compressible frame should use single-segment (level={level:?})"
1398 );
1399 assert_ne!(
1400 first_block_type(&compressed),
1401 BlockType::Raw,
1402 "compressible hinted frame should stay off raw fast path (level={level:?})"
1403 );
1404 assert!(
1405 compressed.len() < data.len(),
1406 "compressible hinted frame should still shrink (level={level:?})"
1407 );
1408 let mut decoded = Vec::new();
1409 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1410 .unwrap_or_else(|e| panic!("ffi decode failed (level={level:?}): {e}"));
1411 assert_eq!(decoded, data);
1412 }
1413 }
1414
1415 struct NoDictionaryMatcher {
1416 last_space: Vec<u8>,
1417 window_size: u64,
1418 }
1419
1420 impl NoDictionaryMatcher {
1421 fn new(window_size: u64) -> Self {
1422 Self {
1423 last_space: Vec::new(),
1424 window_size,
1425 }
1426 }
1427 }
1428
1429 impl Matcher for NoDictionaryMatcher {
1430 fn get_next_space(&mut self) -> Vec<u8> {
1431 vec![0; self.window_size as usize]
1432 }
1433
1434 fn get_last_space(&mut self) -> &[u8] {
1435 self.last_space.as_slice()
1436 }
1437
1438 fn commit_space(&mut self, space: Vec<u8>) {
1439 self.last_space = space;
1440 }
1441
1442 fn skip_matching(&mut self) {}
1443
1444 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1445 handle_sequence(Sequence::Literals {
1446 literals: self.last_space.as_slice(),
1447 });
1448 }
1449
1450 fn reset(&mut self, _level: super::CompressionLevel) {
1451 self.last_space.clear();
1452 }
1453
1454 fn window_size(&self) -> u64 {
1455 self.window_size
1456 }
1457 }
1458
1459 #[test]
1460 fn frame_starts_with_magic_num() {
1461 let mock_data = [1_u8, 2, 3].as_slice();
1462 let mut output: Vec<u8> = Vec::new();
1463 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1464 compressor.set_source(mock_data);
1465 compressor.set_drain(&mut output);
1466
1467 compressor.compress();
1468 assert!(output.starts_with(&MAGIC_NUM.to_le_bytes()));
1469 }
1470
1471 #[test]
1472 fn very_simple_raw_compress() {
1473 let mock_data = [1_u8, 2, 3].as_slice();
1474 let mut output: Vec<u8> = Vec::new();
1475 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1476 compressor.set_source(mock_data);
1477 compressor.set_drain(&mut output);
1478
1479 compressor.compress();
1480 }
1481
1482 #[test]
1483 fn very_simple_compress() {
1484 let mut mock_data = vec![0; 1 << 17];
1485 mock_data.extend(vec![1; (1 << 17) - 1]);
1486 mock_data.extend(vec![2; (1 << 18) - 1]);
1487 mock_data.extend(vec![2; 1 << 17]);
1488 mock_data.extend(vec![3; (1 << 17) - 1]);
1489 let mut output: Vec<u8> = Vec::new();
1490 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1491 compressor.set_source(mock_data.as_slice());
1492 compressor.set_drain(&mut output);
1493
1494 compressor.compress();
1495
1496 let mut decoder = FrameDecoder::new();
1497 let mut decoded = Vec::with_capacity(mock_data.len());
1498 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1499 assert_eq!(mock_data, decoded);
1500
1501 let mut decoded = Vec::new();
1502 zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1503 assert_eq!(mock_data, decoded);
1504 }
1505
1506 #[test]
1507 fn rle_compress() {
1508 let mock_data = vec![0; 1 << 19];
1509 let mut output: Vec<u8> = Vec::new();
1510 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1511 compressor.set_source(mock_data.as_slice());
1512 compressor.set_drain(&mut output);
1513
1514 compressor.compress();
1515
1516 let mut decoder = FrameDecoder::new();
1517 let mut decoded = Vec::with_capacity(mock_data.len());
1518 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1519 assert_eq!(mock_data, decoded);
1520 }
1521
1522 #[test]
1523 fn aaa_compress() {
1524 let mock_data = vec![0, 1, 3, 4, 5];
1525 let mut output: Vec<u8> = Vec::new();
1526 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1527 compressor.set_source(mock_data.as_slice());
1528 compressor.set_drain(&mut output);
1529
1530 compressor.compress();
1531
1532 let mut decoder = FrameDecoder::new();
1533 let mut decoded = Vec::with_capacity(mock_data.len());
1534 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1535 assert_eq!(mock_data, decoded);
1536
1537 let mut decoded = Vec::new();
1538 zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1539 assert_eq!(mock_data, decoded);
1540 }
1541
1542 #[test]
1543 fn dictionary_compression_sets_required_dict_id_and_roundtrips() {
1544 let dict_raw = include_bytes!("../../dict_tests/dictionary");
1545 let dict_for_encoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1546 let dict_for_decoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1547
1548 let mut data = Vec::new();
1549 for _ in 0..8 {
1550 data.extend_from_slice(&dict_for_decoder.dict_content[..2048]);
1551 }
1552
1553 let mut with_dict = Vec::new();
1554 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1555 let previous = compressor
1556 .set_dictionary_from_bytes(dict_raw)
1557 .expect("dictionary bytes should parse");
1558 assert!(
1559 previous.is_none(),
1560 "first dictionary insert should return None"
1561 );
1562 assert_eq!(
1563 compressor
1564 .set_dictionary(dict_for_encoder)
1565 .expect("valid dictionary should attach")
1566 .expect("set_dictionary_from_bytes inserted previous dictionary")
1567 .id,
1568 dict_for_decoder.id
1569 );
1570 compressor.set_source(data.as_slice());
1571 compressor.set_drain(&mut with_dict);
1572 compressor.compress();
1573
1574 let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1575 .expect("encoded stream should have a frame header");
1576 assert_eq!(frame_header.dictionary_id(), Some(dict_for_decoder.id));
1577
1578 let mut decoder = FrameDecoder::new();
1579 let mut missing_dict_target = Vec::with_capacity(data.len());
1580 let err = decoder
1581 .decode_all_to_vec(&with_dict, &mut missing_dict_target)
1582 .unwrap_err();
1583 assert!(
1584 matches!(
1585 &err,
1586 crate::decoding::errors::FrameDecoderError::DictNotProvided { .. }
1587 ),
1588 "dict-compressed stream should require dictionary id, got: {err:?}"
1589 );
1590
1591 let mut decoder = FrameDecoder::new();
1592 decoder.add_dict(dict_for_decoder).unwrap();
1593 let mut decoded = Vec::with_capacity(data.len());
1594 decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1595 assert_eq!(decoded, data);
1596
1597 let mut ffi_decoder = zstd::bulk::Decompressor::with_dictionary(dict_raw).unwrap();
1598 let mut ffi_decoded = Vec::with_capacity(data.len());
1599 let ffi_written = ffi_decoder
1600 .decompress_to_buffer(with_dict.as_slice(), &mut ffi_decoded)
1601 .unwrap();
1602 assert_eq!(ffi_written, data.len());
1603 assert_eq!(ffi_decoded, data);
1604 }
1605
1606 #[cfg(all(feature = "dict_builder", feature = "std"))]
1607 #[test]
1608 fn dictionary_compression_roundtrips_with_dict_builder_dictionary() {
1609 use std::io::Cursor;
1610
1611 let mut training = Vec::new();
1612 for idx in 0..256u32 {
1613 training.extend_from_slice(
1614 format!("tenant=demo table=orders key={idx} region=eu\n").as_bytes(),
1615 );
1616 }
1617 let mut raw_dict = Vec::new();
1618 crate::dictionary::create_raw_dict_from_source(
1619 Cursor::new(training.as_slice()),
1620 training.len(),
1621 &mut raw_dict,
1622 4096,
1623 )
1624 .expect("dict_builder training should succeed");
1625 assert!(
1626 !raw_dict.is_empty(),
1627 "dict_builder produced an empty dictionary"
1628 );
1629
1630 let dict_id = 0xD1C7_0008;
1631 let encoder_dict =
1632 crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1633 let decoder_dict =
1634 crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1635
1636 let mut payload = Vec::new();
1637 for idx in 0..96u32 {
1638 payload.extend_from_slice(
1639 format!(
1640 "tenant=demo table=orders op=put key={idx} value=aaaaabbbbbcccccdddddeeeee\n"
1641 )
1642 .as_bytes(),
1643 );
1644 }
1645
1646 let mut without_dict = Vec::new();
1647 let mut baseline = FrameCompressor::new(super::CompressionLevel::Fastest);
1648 baseline.set_source(payload.as_slice());
1649 baseline.set_drain(&mut without_dict);
1650 baseline.compress();
1651
1652 let mut with_dict = Vec::new();
1653 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1654 compressor
1655 .set_dictionary(encoder_dict)
1656 .expect("valid dict_builder dictionary should attach");
1657 compressor.set_source(payload.as_slice());
1658 compressor.set_drain(&mut with_dict);
1659 compressor.compress();
1660
1661 let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1662 .expect("encoded stream should have a frame header");
1663 assert_eq!(frame_header.dictionary_id(), Some(dict_id));
1664 let mut decoder = FrameDecoder::new();
1665 decoder.add_dict(decoder_dict).unwrap();
1666 let mut decoded = Vec::with_capacity(payload.len());
1667 decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1668 assert_eq!(decoded, payload);
1669 assert!(
1670 with_dict.len() < without_dict.len(),
1671 "trained dictionary should improve compression for this small payload"
1672 );
1673 }
1674
1675 #[test]
1676 fn set_dictionary_from_bytes_seeds_entropy_tables_for_first_block() {
1677 let dict_raw = include_bytes!("../../dict_tests/dictionary");
1678 let mut output = Vec::new();
1679 let input = b"";
1680
1681 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1682 let previous = compressor
1683 .set_dictionary_from_bytes(dict_raw)
1684 .expect("dictionary bytes should parse");
1685 assert!(previous.is_none());
1686
1687 compressor.set_source(input.as_slice());
1688 compressor.set_drain(&mut output);
1689 compressor.compress();
1690
1691 assert!(
1692 compressor.state.last_huff_table.is_some(),
1693 "dictionary entropy should seed previous huffman table before first block"
1694 );
1695 assert!(
1696 compressor.state.fse_tables.ll_previous.is_some(),
1697 "dictionary entropy should seed previous ll table before first block"
1698 );
1699 assert!(
1700 compressor.state.fse_tables.ml_previous.is_some(),
1701 "dictionary entropy should seed previous ml table before first block"
1702 );
1703 assert!(
1704 compressor.state.fse_tables.of_previous.is_some(),
1705 "dictionary entropy should seed previous of table before first block"
1706 );
1707 }
1708
1709 #[test]
1710 fn set_dictionary_rejects_zero_dictionary_id() {
1711 let invalid = crate::decoding::Dictionary {
1712 id: 0,
1713 fse: crate::decoding::scratch::FSEScratch::new(),
1714 huf: crate::decoding::scratch::HuffmanScratch::new(),
1715 dict_content: vec![1, 2, 3],
1716 offset_hist: [1, 4, 8],
1717 };
1718
1719 let mut compressor: FrameCompressor<
1720 &[u8],
1721 Vec<u8>,
1722 crate::encoding::match_generator::MatchGeneratorDriver,
1723 > = FrameCompressor::new(super::CompressionLevel::Fastest);
1724 let result = compressor.set_dictionary(invalid);
1725 assert!(matches!(
1726 result,
1727 Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId)
1728 ));
1729 }
1730
1731 #[test]
1732 fn set_dictionary_rejects_zero_repeat_offsets() {
1733 let invalid = crate::decoding::Dictionary {
1734 id: 1,
1735 fse: crate::decoding::scratch::FSEScratch::new(),
1736 huf: crate::decoding::scratch::HuffmanScratch::new(),
1737 dict_content: vec![1, 2, 3],
1738 offset_hist: [0, 4, 8],
1739 };
1740
1741 let mut compressor: FrameCompressor<
1742 &[u8],
1743 Vec<u8>,
1744 crate::encoding::match_generator::MatchGeneratorDriver,
1745 > = FrameCompressor::new(super::CompressionLevel::Fastest);
1746 let result = compressor.set_dictionary(invalid);
1747 assert!(matches!(
1748 result,
1749 Err(
1750 crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
1751 index: 0
1752 }
1753 )
1754 ));
1755 }
1756
1757 #[test]
1758 fn uncompressed_mode_does_not_require_dictionary() {
1759 let dict_id = 0xABCD_0001;
1760 let dict =
1761 crate::decoding::Dictionary::from_raw_content(dict_id, b"shared-history".to_vec())
1762 .expect("raw dictionary should be valid");
1763
1764 let payload = b"plain-bytes-that-should-stay-raw";
1765 let mut output = Vec::new();
1766 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1767 compressor
1768 .set_dictionary(dict)
1769 .expect("dictionary should attach in uncompressed mode");
1770 compressor.set_source(payload.as_slice());
1771 compressor.set_drain(&mut output);
1772 compressor.compress();
1773
1774 let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1775 .expect("encoded frame should have a header");
1776 assert_eq!(
1777 frame_header.dictionary_id(),
1778 None,
1779 "raw/uncompressed frames must not advertise dictionary dependency"
1780 );
1781
1782 let mut decoder = FrameDecoder::new();
1783 let mut decoded = Vec::with_capacity(payload.len());
1784 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1785 assert_eq!(decoded, payload);
1786 }
1787
1788 #[test]
1789 fn dictionary_roundtrip_stays_valid_after_output_exceeds_window() {
1790 use crate::encoding::match_generator::MatchGeneratorDriver;
1791
1792 let dict_id = 0xABCD_0002;
1793 let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1794 .expect("raw dictionary should be valid");
1795 let dict_for_decoder =
1796 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1797 .expect("raw dictionary should be valid");
1798
1799 let payload = b"abcdefgh".repeat(512 * 1024 / 8 + 64);
1804 let matcher = MatchGeneratorDriver::new(1024, 1);
1805
1806 let mut no_dict_output = Vec::new();
1807 let mut no_dict_compressor =
1808 FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1809 no_dict_compressor.set_source(payload.as_slice());
1810 no_dict_compressor.set_drain(&mut no_dict_output);
1811 no_dict_compressor.compress();
1812 let (no_dict_frame_header, _) =
1813 crate::decoding::frame::read_frame_header(no_dict_output.as_slice())
1814 .expect("baseline frame should have a header");
1815 let no_dict_window = no_dict_frame_header
1816 .window_size()
1817 .expect("window size should be present");
1818
1819 let mut output = Vec::new();
1820 let matcher = MatchGeneratorDriver::new(1024, 1);
1821 let mut compressor =
1822 FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1823 compressor
1824 .set_dictionary(dict)
1825 .expect("dictionary should attach");
1826 compressor.set_source(payload.as_slice());
1827 compressor.set_drain(&mut output);
1828 compressor.compress();
1829
1830 let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1831 .expect("encoded frame should have a header");
1832 let advertised_window = frame_header
1833 .window_size()
1834 .expect("window size should be present");
1835 assert_eq!(
1836 advertised_window, no_dict_window,
1837 "dictionary priming must not inflate advertised window size"
1838 );
1839 assert!(
1840 payload.len() > advertised_window as usize,
1841 "test must cross the advertised window boundary"
1842 );
1843
1844 let mut decoder = FrameDecoder::new();
1845 decoder.add_dict(dict_for_decoder).unwrap();
1846 let mut decoded = Vec::with_capacity(payload.len());
1847 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1848 assert_eq!(decoded, payload);
1849 }
1850
1851 #[test]
1852 fn source_size_hint_with_dictionary_keeps_roundtrip_and_nonincreasing_window() {
1853 let dict_id = 0xABCD_0004;
1854 let dict_content = b"abcd".repeat(1024); let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1856 let dict_for_decoder =
1857 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1858 let payload = b"abcdabcdabcdabcd".repeat(128);
1859
1860 let mut hinted_output = Vec::new();
1861 let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1862 hinted.set_dictionary(dict).unwrap();
1863 hinted.set_source_size_hint(1);
1864 hinted.set_source(payload.as_slice());
1865 hinted.set_drain(&mut hinted_output);
1866 hinted.compress();
1867
1868 let mut no_hint_output = Vec::new();
1869 let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1870 no_hint
1871 .set_dictionary(
1872 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1873 .unwrap(),
1874 )
1875 .unwrap();
1876 no_hint.set_source(payload.as_slice());
1877 no_hint.set_drain(&mut no_hint_output);
1878 no_hint.compress();
1879
1880 let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1881 .expect("encoded frame should have a header")
1882 .0
1883 .window_size()
1884 .expect("window size should be present");
1885 let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1886 .expect("encoded frame should have a header")
1887 .0
1888 .window_size()
1889 .expect("window size should be present");
1890 assert!(
1891 hinted_window <= no_hint_window,
1892 "source-size hint should not increase advertised window with dictionary priming",
1893 );
1894
1895 let mut decoder = FrameDecoder::new();
1896 decoder.add_dict(dict_for_decoder).unwrap();
1897 let mut decoded = Vec::with_capacity(payload.len());
1898 decoder
1899 .decode_all_to_vec(&hinted_output, &mut decoded)
1900 .unwrap();
1901 assert_eq!(decoded, payload);
1902 }
1903
1904 #[test]
1905 fn source_size_hint_with_dictionary_keeps_roundtrip_for_larger_payload() {
1906 let dict_id = 0xABCD_0005;
1907 let dict_content = b"abcd".repeat(1024); let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1909 let dict_for_decoder =
1910 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1911 let payload = b"abcd".repeat(1024); let payload_len = payload.len() as u64;
1913
1914 let mut hinted_output = Vec::new();
1915 let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1916 hinted.set_dictionary(dict).unwrap();
1917 hinted.set_source_size_hint(payload_len);
1918 hinted.set_source(payload.as_slice());
1919 hinted.set_drain(&mut hinted_output);
1920 hinted.compress();
1921
1922 let mut no_hint_output = Vec::new();
1923 let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1924 no_hint
1925 .set_dictionary(
1926 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1927 .unwrap(),
1928 )
1929 .unwrap();
1930 no_hint.set_source(payload.as_slice());
1931 no_hint.set_drain(&mut no_hint_output);
1932 no_hint.compress();
1933
1934 let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1935 .expect("encoded frame should have a header")
1936 .0
1937 .window_size()
1938 .expect("window size should be present");
1939 let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1940 .expect("encoded frame should have a header")
1941 .0
1942 .window_size()
1943 .expect("window size should be present");
1944 assert!(
1945 hinted_window <= no_hint_window,
1946 "source-size hint should not increase advertised window with dictionary priming",
1947 );
1948
1949 let mut decoder = FrameDecoder::new();
1950 decoder.add_dict(dict_for_decoder).unwrap();
1951 let mut decoded = Vec::with_capacity(payload.len());
1952 decoder
1953 .decode_all_to_vec(&hinted_output, &mut decoded)
1954 .unwrap();
1955 assert_eq!(decoded, payload);
1956 }
1957
1958 #[test]
1959 fn custom_matcher_without_dictionary_priming_does_not_advertise_dict_id() {
1960 let dict_id = 0xABCD_0003;
1961 let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1962 .expect("raw dictionary should be valid");
1963 let payload = b"abcdefghabcdefgh";
1964
1965 let mut output = Vec::new();
1966 let matcher = NoDictionaryMatcher::new(64);
1967 let mut compressor =
1968 FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1969 compressor
1970 .set_dictionary(dict)
1971 .expect("dictionary should attach");
1972 compressor.set_source(payload.as_slice());
1973 compressor.set_drain(&mut output);
1974 compressor.compress();
1975
1976 let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1977 .expect("encoded frame should have a header");
1978 assert_eq!(
1979 frame_header.dictionary_id(),
1980 None,
1981 "matchers that do not support dictionary priming must not advertise dictionary dependency"
1982 );
1983
1984 let mut decoder = FrameDecoder::new();
1985 let mut decoded = Vec::with_capacity(payload.len());
1986 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1987 assert_eq!(decoded, payload);
1988 }
1989
1990 #[cfg(feature = "hash")]
1991 #[test]
1992 fn checksum_two_frames_reused_compressor() {
1993 let data: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
1999
2000 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
2001
2002 let mut compressed1 = Vec::new();
2004 compressor.set_source(data.as_slice());
2005 compressor.set_drain(&mut compressed1);
2006 compressor.compress();
2007
2008 let mut compressed2 = Vec::new();
2010 compressor.set_source(data.as_slice());
2011 compressor.set_drain(&mut compressed2);
2012 compressor.compress();
2013
2014 fn decode_and_collect(compressed: &[u8]) -> (Vec<u8>, Option<u32>, Option<u32>) {
2015 let mut decoder = FrameDecoder::new();
2016 let mut source = compressed;
2017 decoder.reset(&mut source).unwrap();
2018 while !decoder.is_finished() {
2019 decoder
2020 .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
2021 .unwrap();
2022 }
2023 let mut decoded = Vec::new();
2024 decoder.collect_to_writer(&mut decoded).unwrap();
2025 (
2026 decoded,
2027 decoder.get_checksum_from_data(),
2028 decoder.get_calculated_checksum(),
2029 )
2030 }
2031
2032 let (decoded1, chksum_from_data1, chksum_calculated1) = decode_and_collect(&compressed1);
2033 assert_eq!(decoded1, data, "frame 1: decoded data mismatch");
2034 assert_eq!(
2035 chksum_from_data1, chksum_calculated1,
2036 "frame 1: checksum mismatch"
2037 );
2038
2039 let (decoded2, chksum_from_data2, chksum_calculated2) = decode_and_collect(&compressed2);
2040 assert_eq!(decoded2, data, "frame 2: decoded data mismatch");
2041 assert_eq!(
2042 chksum_from_data2, chksum_calculated2,
2043 "frame 2: checksum mismatch"
2044 );
2045
2046 assert_eq!(
2049 chksum_from_data1, chksum_from_data2,
2050 "frame 1 and frame 2 should have the same checksum (same data, hash must reset per frame)"
2051 );
2052 }
2053
2054 #[cfg(feature = "std")]
2055 #[test]
2056 fn fuzz_targets() {
2057 use std::io::Read;
2058 fn decode_szstd(data: &mut dyn std::io::Read) -> Vec<u8> {
2059 let mut decoder = crate::decoding::StreamingDecoder::new(data).unwrap();
2060 let mut result: Vec<u8> = Vec::new();
2061 decoder.read_to_end(&mut result).expect("Decoding failed");
2062 result
2063 }
2064
2065 fn decode_szstd_writer(mut data: impl Read) -> Vec<u8> {
2066 let mut decoder = crate::decoding::FrameDecoder::new();
2067 decoder.reset(&mut data).unwrap();
2068 let mut result = vec![];
2069 while !decoder.is_finished() || decoder.can_collect() > 0 {
2070 decoder
2071 .decode_blocks(
2072 &mut data,
2073 crate::decoding::BlockDecodingStrategy::UptoBytes(1024 * 1024),
2074 )
2075 .unwrap();
2076 decoder.collect_to_writer(&mut result).unwrap();
2077 }
2078 result
2079 }
2080
2081 fn encode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2082 zstd::stream::encode_all(std::io::Cursor::new(data), 3)
2083 }
2084
2085 fn encode_szstd_uncompressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2086 let mut input = Vec::new();
2087 data.read_to_end(&mut input).unwrap();
2088
2089 crate::encoding::compress_to_vec(
2090 input.as_slice(),
2091 crate::encoding::CompressionLevel::Uncompressed,
2092 )
2093 }
2094
2095 fn encode_szstd_compressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2096 let mut input = Vec::new();
2097 data.read_to_end(&mut input).unwrap();
2098
2099 crate::encoding::compress_to_vec(
2100 input.as_slice(),
2101 crate::encoding::CompressionLevel::Fastest,
2102 )
2103 }
2104
2105 fn decode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2106 let mut output = Vec::new();
2107 zstd::stream::copy_decode(data, &mut output)?;
2108 Ok(output)
2109 }
2110 if std::fs::exists("fuzz/artifacts/interop").unwrap_or(false) {
2111 for file in std::fs::read_dir("fuzz/artifacts/interop").unwrap() {
2112 if file.as_ref().unwrap().file_type().unwrap().is_file() {
2113 let data = std::fs::read(file.unwrap().path()).unwrap();
2114 let data = data.as_slice();
2115 let compressed = encode_zstd(data).unwrap();
2117 let decoded = decode_szstd(&mut compressed.as_slice());
2118 let decoded2 = decode_szstd_writer(&mut compressed.as_slice());
2119 assert!(
2120 decoded == data,
2121 "Decoded data did not match the original input during decompression"
2122 );
2123 assert_eq!(
2124 decoded2, data,
2125 "Decoded data did not match the original input during decompression"
2126 );
2127
2128 let mut input = data;
2131 let compressed = encode_szstd_uncompressed(&mut input);
2132 let decoded = decode_zstd(&compressed).unwrap();
2133 assert_eq!(
2134 decoded, data,
2135 "Decoded data did not match the original input during compression"
2136 );
2137 let mut input = data;
2139 let compressed = encode_szstd_compressed(&mut input);
2140 let decoded = decode_zstd(&compressed).unwrap();
2141 assert_eq!(
2142 decoded, data,
2143 "Decoded data did not match the original input during compression"
2144 );
2145 }
2146 }
2147 }
2148 }
2149
2150 #[test]
2156 fn donor_split_block_from_borders_keeps_homogeneous_block() {
2157 let block = vec![0xAAu8; MAX_BLOCK_SIZE as usize];
2158 let split = super::donor_split_block_from_borders(&block);
2159 assert_eq!(split, MAX_BLOCK_SIZE as usize);
2160 }
2161
2162 #[test]
2176 fn donor_split_block_from_borders_returns_midpoint_for_centred_transition() {
2177 let mut block = vec![0u8; MAX_BLOCK_SIZE as usize];
2178 for (i, byte) in block
2179 .iter_mut()
2180 .enumerate()
2181 .skip(MAX_BLOCK_SIZE as usize / 2)
2182 {
2183 *byte = (i % 251 + 1) as u8;
2184 }
2185 let split = super::donor_split_block_from_borders(&block);
2186 assert_eq!(
2187 split,
2188 64 * 1024,
2189 "centred-transition fixture must take the symmetric \
2190 midpoint arm (`abs_diff < min_distance`), got {split}"
2191 );
2192 }
2193
2194 #[test]
2199 fn donor_pre_split_level_dispatches_by_compression_level() {
2200 use crate::encoding::CompressionLevel;
2201 assert_eq!(
2202 super::donor_pre_split_level(CompressionLevel::Fastest),
2203 None
2204 );
2205 assert_eq!(
2206 super::donor_pre_split_level(CompressionLevel::Default),
2207 None
2208 );
2209 assert_eq!(super::donor_pre_split_level(CompressionLevel::Better), None);
2210 assert_eq!(
2211 super::donor_pre_split_level(CompressionLevel::Level(7)),
2212 None
2213 );
2214 assert_eq!(
2215 super::donor_pre_split_level(CompressionLevel::Level(11)),
2216 Some(0)
2217 );
2218 assert_eq!(
2219 super::donor_pre_split_level(CompressionLevel::Level(15)),
2220 Some(0)
2221 );
2222 assert_eq!(
2223 super::donor_pre_split_level(CompressionLevel::Level(16)),
2224 Some(4)
2225 );
2226 assert_eq!(
2227 super::donor_pre_split_level(CompressionLevel::Level(22)),
2228 Some(4)
2229 );
2230 }
2231
2232 #[test]
2239 fn level_13_borders_split_roundtrips_through_own_decoder() {
2240 use crate::encoding::CompressionLevel;
2241 let mut data = vec![0u8; 256 * 1024];
2242 for (i, byte) in data.iter_mut().enumerate() {
2245 *byte = if i < 128 * 1024 {
2246 (i & 0x07) as u8
2247 } else {
2248 (i % 251 + 1) as u8
2249 };
2250 }
2251
2252 let mut compressed = Vec::new();
2253 let mut compressor = FrameCompressor::new(CompressionLevel::Level(13));
2254 compressor.set_source(data.as_slice());
2255 compressor.set_drain(&mut compressed);
2256 compressor.compress();
2257
2258 let mut decoder = FrameDecoder::new();
2259 let mut source = compressed.as_slice();
2260 decoder
2261 .reset(&mut source)
2262 .expect("frame header should parse");
2263 while !decoder.is_finished() {
2264 decoder
2265 .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
2266 .expect("decode should succeed");
2267 }
2268 let mut decoded = Vec::with_capacity(data.len());
2269 decoder.collect_to_writer(&mut decoded).unwrap();
2270 assert_eq!(decoded, data, "roundtrip must reproduce the input verbatim");
2271 }
2272
2273 #[cfg(feature = "std")]
2285 #[test]
2286 fn set_compression_level_then_compress_refreshes_strategy_tag() {
2287 use super::CompressionLevel;
2288 use crate::encoding::strategy::StrategyTag;
2289
2290 let data = vec![0xABu8; 256];
2291 let mut out = Vec::new();
2292 let mut compressor = FrameCompressor::new(CompressionLevel::Fastest);
2293 let initial_tag = compressor.state.strategy_tag;
2294 assert_eq!(
2295 initial_tag,
2296 StrategyTag::for_compression_level(CompressionLevel::Fastest),
2297 "construction-time strategy_tag must reflect initial level",
2298 );
2299
2300 let new_level = CompressionLevel::Level(20);
2304 compressor.set_compression_level(new_level);
2305 compressor.set_source(data.as_slice());
2306 compressor.set_drain(&mut out);
2307 compressor.compress();
2308
2309 let new_tag = compressor.state.strategy_tag;
2310 let expected = StrategyTag::for_compression_level(new_level);
2311 assert_eq!(
2312 new_tag, expected,
2313 "strategy_tag must follow set_compression_level → compress, \
2314 got {new_tag:?} expected {expected:?}",
2315 );
2316 assert_eq!(
2317 expected,
2318 StrategyTag::BtUltra2,
2319 "test fixture invariant: Level(20) must resolve to BtUltra2 \
2320 so the post-switch tag visibly crosses the band boundary",
2321 );
2322 assert_ne!(
2323 new_tag, initial_tag,
2324 "test fixture invariant: chosen levels must resolve to \
2325 different StrategyTag variants",
2326 );
2327 }
2328
2329 #[test]
2333 fn magicless_frame_omits_magic_and_roundtrips() {
2334 use crate::common::MAGIC_NUM;
2335 let input: alloc::vec::Vec<u8> = (0..512u32).map(|i| (i ^ 0xA5) as u8).collect();
2336
2337 let mut output: Vec<u8> = Vec::new();
2339 let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
2340 compressor.set_magicless(true);
2341 compressor.set_source(input.as_slice());
2342 compressor.set_drain(&mut output);
2343 compressor.compress();
2344
2345 assert!(
2347 !output.starts_with(&MAGIC_NUM.to_le_bytes()),
2348 "magicless frame must omit the 4-byte magic prefix",
2349 );
2350
2351 let mut decoder = crate::decoding::FrameDecoder::new();
2353 decoder.set_magicless(true);
2354 let mut cursor: &[u8] = output.as_slice();
2355 decoder.init(&mut cursor).expect("magicless init");
2356 decoder
2357 .decode_blocks(&mut cursor, crate::decoding::BlockDecodingStrategy::All)
2358 .expect("decode_blocks");
2359 let mut decoded: Vec<u8> = Vec::new();
2360 decoder
2361 .collect_to_writer(&mut decoded)
2362 .expect("collect_to_writer");
2363 assert_eq!(decoded, input, "magicless roundtrip must preserve bytes");
2364
2365 use crate::decoding::errors::{FrameDecoderError, ReadFrameHeaderError};
2376 let mut std_decoder = crate::decoding::FrameDecoder::new();
2377 let std_init = std_decoder.init(output.as_slice());
2378 match std_init {
2379 Err(FrameDecoderError::ReadFrameHeaderError(
2380 ReadFrameHeaderError::BadMagicNumber(_) | ReadFrameHeaderError::SkipFrame { .. },
2381 )) => {}
2382 other => panic!(
2383 "standard decoder must reject a magicless frame with \
2384 ReadFrameHeaderError::BadMagicNumber or SkipFrame, got {other:?}",
2385 ),
2386 }
2387 }
2388}