1use alloc::{boxed::Box, vec::Vec};
4use core::convert::TryInto;
5#[cfg(feature = "hash")]
6use twox_hash::XxHash64;
7
8#[cfg(feature = "hash")]
9use core::hash::Hasher;
10
11use super::{
12 CompressionLevel, Matcher, block_header::BlockHeader, frame_header::FrameHeader, levels::*,
13 match_generator::MatchGeneratorDriver,
14};
15use crate::common::MAX_BLOCK_SIZE;
16use crate::fse::fse_encoder::{FSETable, default_ll_table, default_ml_table, default_of_table};
17
18use crate::io::{Read, Write};
19
20pub struct FrameCompressor<R: Read, W: Write, M: Matcher> {
40 uncompressed_data: Option<R>,
41 compressed_data: Option<W>,
42 compression_level: CompressionLevel,
43 dictionary: Option<crate::decoding::Dictionary>,
44 dictionary_entropy_cache: Option<CachedDictionaryEntropy>,
45 source_size_hint: Option<u64>,
46 state: CompressState<M>,
47 magicless: bool,
53 #[cfg(feature = "hash")]
54 hasher: XxHash64,
55}
56
57#[derive(Clone, Default)]
58struct CachedDictionaryEntropy {
59 huff: Option<crate::huff0::huff0_encoder::HuffmanTable>,
60 ll_previous: Option<PreviousFseTable>,
61 ml_previous: Option<PreviousFseTable>,
62 of_previous: Option<PreviousFseTable>,
63}
64
65#[derive(Clone)]
66pub(crate) enum PreviousFseTable {
67 Default,
70 Custom(Box<FSETable>),
71 Rle(u8),
72}
73
74impl PreviousFseTable {
75 pub(crate) fn as_table<'a>(&'a self, default: &'a FSETable) -> Option<&'a FSETable> {
76 match self {
77 Self::Default => Some(default),
78 Self::Custom(table) => Some(table),
79 Self::Rle(_) => None,
80 }
81 }
82}
83
84pub(crate) struct FseTables {
85 pub(crate) ll_default: crate::fse::fse_encoder::FseDefaultTable,
97 pub(crate) ll_previous: Option<PreviousFseTable>,
98 pub(crate) ml_default: crate::fse::fse_encoder::FseDefaultTable,
99 pub(crate) ml_previous: Option<PreviousFseTable>,
100 pub(crate) of_default: crate::fse::fse_encoder::FseDefaultTable,
101 pub(crate) of_previous: Option<PreviousFseTable>,
102}
103
104impl FseTables {
105 pub fn new() -> Self {
106 Self {
107 ll_default: default_ll_table(),
108 ll_previous: None,
109 ml_default: default_ml_table(),
110 ml_previous: None,
111 of_default: default_of_table(),
112 of_previous: None,
113 }
114 }
115
116 #[inline]
123 #[allow(clippy::borrow_deref_ref)]
124 pub(crate) fn ll_default_ref(&self) -> &FSETable {
125 &*self.ll_default
126 }
127
128 #[inline]
130 #[allow(clippy::borrow_deref_ref)]
131 pub(crate) fn ml_default_ref(&self) -> &FSETable {
132 &*self.ml_default
133 }
134
135 #[inline]
137 #[allow(clippy::borrow_deref_ref)]
138 pub(crate) fn of_default_ref(&self) -> &FSETable {
139 &*self.of_default
140 }
141}
142
143const PRESPLIT_BLOCK_MIN: usize = 3500;
144const PRESPLIT_THRESHOLD_PENALTY_RATE: u64 = 16;
145const PRESPLIT_THRESHOLD_BASE: u64 = PRESPLIT_THRESHOLD_PENALTY_RATE - 2;
146const PRESPLIT_THRESHOLD_PENALTY: i32 = 3;
147const PRESPLIT_CHUNK_SIZE: usize = 8 << 10;
148const PRESPLIT_HASH_LOG_MAX: usize = 10;
149const PRESPLIT_HASH_TABLE_SIZE: usize = 1 << PRESPLIT_HASH_LOG_MAX;
150const PRESPLIT_KNUTH: u32 = 0x9E37_79B9;
151const PRESPLIT_BORDERS_SEGMENT: usize = 512;
156
157#[derive(Clone)]
158struct PreSplitFingerprint {
159 events: [u32; PRESPLIT_HASH_TABLE_SIZE],
160 nb_events: usize,
161}
162
163impl Default for PreSplitFingerprint {
164 fn default() -> Self {
165 Self {
166 events: [0; PRESPLIT_HASH_TABLE_SIZE],
167 nb_events: 0,
168 }
169 }
170}
171
172fn presplit_hash2(bytes: &[u8], hash_log: usize) -> usize {
173 debug_assert!(hash_log >= 8);
174 if hash_log == 8 {
175 return bytes[0] as usize;
176 }
177 debug_assert!(hash_log <= PRESPLIT_HASH_LOG_MAX);
178 let value = u16::from_le_bytes([bytes[0], bytes[1]]) as u32;
179 (value.wrapping_mul(PRESPLIT_KNUTH) >> (32 - hash_log)) as usize
180}
181
182fn presplit_record_fingerprint(
183 fp: &mut PreSplitFingerprint,
184 src: &[u8],
185 sampling_rate: usize,
186 hash_log: usize,
187) {
188 fp.events.fill(0);
189 fp.nb_events = 0;
190 if src.len() < 2 {
191 return;
192 }
193 let limit = src.len() - 1;
194 let mut n = 0usize;
195 while n < limit {
196 fp.events[presplit_hash2(&src[n..], hash_log)] += 1;
197 n += sampling_rate;
198 }
199 fp.nb_events += limit / sampling_rate;
202}
203
204fn presplit_record_byte_histogram(fp: &mut PreSplitFingerprint, src: &[u8]) {
210 fp.events.fill(0);
211 for &b in src {
212 fp.events[b as usize] += 1;
213 }
214 fp.nb_events = src.len();
217}
218
219fn presplit_distance(lhs: &PreSplitFingerprint, rhs: &PreSplitFingerprint, hash_log: usize) -> u64 {
220 let slots = 1usize << hash_log;
221 let mut distance = 0u64;
222 for idx in 0..slots {
223 let left = lhs.events[idx] as i128 * rhs.nb_events as i128;
224 let right = rhs.events[idx] as i128 * lhs.nb_events as i128;
225 distance = distance.saturating_add(left.abs_diff(right) as u64);
226 }
227 distance
228}
229
230fn presplit_fingerprints_differ(
231 reference: &PreSplitFingerprint,
232 new_fp: &PreSplitFingerprint,
233 penalty: i32,
234 hash_log: usize,
235) -> bool {
236 debug_assert!(reference.nb_events > 0);
237 debug_assert!(new_fp.nb_events > 0);
238 let p50 = reference.nb_events as u64 * new_fp.nb_events as u64;
239 let deviation = presplit_distance(reference, new_fp, hash_log);
240 let threshold = p50.saturating_mul(PRESPLIT_THRESHOLD_BASE + penalty as u64)
241 / PRESPLIT_THRESHOLD_PENALTY_RATE;
242 deviation >= threshold
243}
244
245fn presplit_merge_events(acc: &mut PreSplitFingerprint, new_fp: &PreSplitFingerprint) {
246 for idx in 0..PRESPLIT_HASH_TABLE_SIZE {
247 acc.events[idx] = acc.events[idx].saturating_add(new_fp.events[idx]);
248 }
249 acc.nb_events = acc.nb_events.saturating_add(new_fp.nb_events);
250}
251
252fn donor_split_block_by_chunks(block: &[u8], level: usize) -> usize {
253 debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
254 debug_assert!((1..=4).contains(&level));
255 let (sampling_rate, hash_log) = match level - 1 {
256 0 => (43, 8),
257 1 => (11, 9),
258 2 => (5, 10),
259 _ => (1, 10),
260 };
261
262 let mut past = PreSplitFingerprint::default();
263 let mut new_events = PreSplitFingerprint::default();
264 let mut penalty = PRESPLIT_THRESHOLD_PENALTY;
265 presplit_record_fingerprint(
266 &mut past,
267 &block[..PRESPLIT_CHUNK_SIZE],
268 sampling_rate,
269 hash_log,
270 );
271 let mut pos = PRESPLIT_CHUNK_SIZE;
272 while pos <= block.len() - PRESPLIT_CHUNK_SIZE {
273 presplit_record_fingerprint(
274 &mut new_events,
275 &block[pos..pos + PRESPLIT_CHUNK_SIZE],
276 sampling_rate,
277 hash_log,
278 );
279 if presplit_fingerprints_differ(&past, &new_events, penalty, hash_log) {
280 return pos;
281 }
282 presplit_merge_events(&mut past, &new_events);
283 if penalty > 0 {
284 penalty -= 1;
285 }
286 pos += PRESPLIT_CHUNK_SIZE;
287 }
288 block.len()
289}
290
291fn donor_split_block_from_borders(block: &[u8]) -> usize {
299 debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
300 let block_size = block.len();
301 let mut past = PreSplitFingerprint::default();
302 let mut new_fp = PreSplitFingerprint::default();
303 presplit_record_byte_histogram(&mut past, &block[..PRESPLIT_BORDERS_SEGMENT]);
304 presplit_record_byte_histogram(&mut new_fp, &block[block_size - PRESPLIT_BORDERS_SEGMENT..]);
305 if !presplit_fingerprints_differ(&past, &new_fp, 0, 8) {
308 return block_size;
309 }
310
311 let mut middle = PreSplitFingerprint::default();
312 let mid_start = block_size / 2 - PRESPLIT_BORDERS_SEGMENT / 2;
313 presplit_record_byte_histogram(
314 &mut middle,
315 &block[mid_start..mid_start + PRESPLIT_BORDERS_SEGMENT],
316 );
317
318 let dist_from_begin = presplit_distance(&past, &middle, 8);
319 let dist_from_end = presplit_distance(&new_fp, &middle, 8);
320 let min_distance = (PRESPLIT_BORDERS_SEGMENT as u64) * (PRESPLIT_BORDERS_SEGMENT as u64) / 3;
324 if dist_from_begin.abs_diff(dist_from_end) < min_distance {
325 return 64 * 1024;
326 }
327 if dist_from_begin > dist_from_end {
336 32 * 1024
337 } else {
338 96 * 1024
339 }
340}
341
342fn donor_pre_split_level(level: CompressionLevel) -> Option<usize> {
343 match level {
344 CompressionLevel::Level(11..=15) => Some(0),
350 CompressionLevel::Level(16..=22) => Some(4),
354 _ => None,
355 }
356}
357
358pub(crate) fn donor_optimal_block_size(
359 level: CompressionLevel,
360 block: &[u8],
361 remaining_src_size: usize,
362 block_size_max: usize,
363 savings: i64,
364) -> usize {
365 let Some(split_level) = donor_pre_split_level(level) else {
366 return remaining_src_size.min(block_size_max);
367 };
368 if remaining_src_size < MAX_BLOCK_SIZE as usize || block_size_max < MAX_BLOCK_SIZE as usize {
369 return remaining_src_size.min(block_size_max);
370 }
371 if savings < 3 {
372 return MAX_BLOCK_SIZE as usize;
373 }
374 if block.len() < MAX_BLOCK_SIZE as usize {
375 return remaining_src_size.min(block_size_max);
376 }
377 let raw_split = if split_level == 0 {
382 donor_split_block_from_borders(&block[..MAX_BLOCK_SIZE as usize])
383 } else {
384 donor_split_block_by_chunks(&block[..MAX_BLOCK_SIZE as usize], split_level)
385 };
386 raw_split
387 .max(PRESPLIT_BLOCK_MIN)
388 .min(MAX_BLOCK_SIZE as usize)
389}
390
391pub(crate) struct CompressState<M: Matcher> {
392 pub(crate) matcher: M,
393 pub(crate) last_huff_table: Option<crate::huff0::huff0_encoder::HuffmanTable>,
394 pub(crate) fse_tables: FseTables,
395 pub(crate) block_scratch: crate::encoding::blocks::CompressedBlockScratch,
396 pub(crate) offset_hist: [u32; 3],
399 pub(crate) strategy_tag: crate::encoding::strategy::StrategyTag,
417}
418
419impl<R: Read, W: Write> FrameCompressor<R, W, MatchGeneratorDriver> {
420 pub fn new(compression_level: CompressionLevel) -> Self {
422 Self {
423 uncompressed_data: None,
424 compressed_data: None,
425 compression_level,
426 dictionary: None,
427 dictionary_entropy_cache: None,
428 source_size_hint: None,
429 state: CompressState {
430 matcher: MatchGeneratorDriver::new(1024 * 128, 1),
431 last_huff_table: None,
432 fse_tables: FseTables::new(),
433 block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
434 offset_hist: [1, 4, 8],
435 strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
436 compression_level,
437 ),
438 },
439 magicless: false,
440 #[cfg(feature = "hash")]
441 hasher: XxHash64::with_seed(0),
442 }
443 }
444}
445
446impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
447 pub fn new_with_matcher(matcher: M, compression_level: CompressionLevel) -> Self {
449 Self {
450 uncompressed_data: None,
451 compressed_data: None,
452 dictionary: None,
453 dictionary_entropy_cache: None,
454 source_size_hint: None,
455 state: CompressState {
456 matcher,
457 last_huff_table: None,
458 fse_tables: FseTables::new(),
459 block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
460 offset_hist: [1, 4, 8],
461 strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
462 compression_level,
463 ),
464 },
465 compression_level,
466 magicless: false,
467 #[cfg(feature = "hash")]
468 hasher: XxHash64::with_seed(0),
469 }
470 }
471
472 pub fn set_magicless(&mut self, magicless: bool) {
479 self.magicless = magicless;
480 }
481
482 pub fn set_source(&mut self, uncompressed_data: R) -> Option<R> {
486 self.uncompressed_data.replace(uncompressed_data)
487 }
488
489 pub fn set_drain(&mut self, compressed_data: W) -> Option<W> {
493 self.compressed_data.replace(compressed_data)
494 }
495
496 pub fn set_source_size_hint(&mut self, size: u64) {
506 self.source_size_hint = Some(size);
507 }
508
509 pub fn compress(&mut self) {
520 let initial_size_hint = self.source_size_hint;
521 let source_size_hint_known = initial_size_hint.is_some();
522 let use_dictionary_state =
523 !matches!(self.compression_level, CompressionLevel::Uncompressed)
524 && self.state.matcher.supports_dictionary_priming()
525 && self.dictionary.is_some();
526 if let Some(size_hint) = self.source_size_hint.take() {
527 self.state.matcher.set_source_size_hint(size_hint);
530 }
531 self.state.matcher.reset(self.compression_level);
533 self.state.offset_hist = [1, 4, 8];
534 self.state.strategy_tag =
541 crate::encoding::strategy::StrategyTag::for_compression_level(self.compression_level);
542 let cached_entropy = if use_dictionary_state {
543 self.dictionary_entropy_cache.as_ref()
544 } else {
545 None
546 };
547 if use_dictionary_state && let Some(dict) = self.dictionary.as_ref() {
548 self.state.offset_hist = dict.offset_hist;
551 self.state
552 .matcher
553 .prime_with_dictionary(dict.dict_content.as_slice(), dict.offset_hist);
554 }
555 if let Some(cache) = cached_entropy {
556 self.state.last_huff_table.clone_from(&cache.huff);
557 } else {
558 self.state.last_huff_table = None;
559 }
560 if let Some(cache) = cached_entropy {
563 self.state
564 .fse_tables
565 .ll_previous
566 .clone_from(&cache.ll_previous);
567 self.state
568 .fse_tables
569 .ml_previous
570 .clone_from(&cache.ml_previous);
571 self.state
572 .fse_tables
573 .of_previous
574 .clone_from(&cache.of_previous);
575 } else {
576 self.state.fse_tables.ll_previous = None;
577 self.state.fse_tables.ml_previous = None;
578 self.state.fse_tables.of_previous = None;
579 }
580 let ll_entropy = cached_entropy.and_then(|cache| match cache.ll_previous.as_ref() {
581 Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
582 _ => None,
583 });
584 let ml_entropy = cached_entropy.and_then(|cache| match cache.ml_previous.as_ref() {
585 Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
586 _ => None,
587 });
588 let of_entropy = cached_entropy.and_then(|cache| match cache.of_previous.as_ref() {
589 Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
590 _ => None,
591 });
592 self.state.matcher.seed_dictionary_entropy(
593 self.state.last_huff_table.as_ref(),
594 ll_entropy,
595 ml_entropy,
596 of_entropy,
597 );
598 #[cfg(feature = "hash")]
599 {
600 self.hasher = XxHash64::with_seed(0);
601 }
602 let source = self.uncompressed_data.as_mut().unwrap();
603 let drain = self.compressed_data.as_mut().unwrap();
604 let window_size = self.state.matcher.window_size();
605 assert!(
606 window_size != 0,
607 "matcher reported window_size == 0, which is invalid"
608 );
609 const ALL_BLOCKS_TINY_THRESHOLD: u64 = 4 * 1024;
629 const ALL_BLOCKS_SMALL_THRESHOLD: u64 = 64 * 1024;
630 const ALL_BLOCKS_TINY_CAP: usize = 4 * 1024;
631 const ALL_BLOCKS_SMALL_CAP: usize = 16 * 1024;
632 const ALL_BLOCKS_DEFAULT_CAP: usize = 130 * 1024;
633 let initial_all_blocks_cap = match initial_size_hint {
634 Some(h) if h <= ALL_BLOCKS_TINY_THRESHOLD => ALL_BLOCKS_TINY_CAP,
635 Some(h) if h <= ALL_BLOCKS_SMALL_THRESHOLD => ALL_BLOCKS_SMALL_CAP,
636 _ => ALL_BLOCKS_DEFAULT_CAP,
637 };
638 let mut all_blocks: Vec<u8> = Vec::with_capacity(initial_all_blocks_cap);
639 let mut total_uncompressed: u64 = 0;
640 let mut pending_input: Vec<u8> = Vec::new();
641 let mut reached_eof = false;
642 let mut savings = 0i64;
643 loop {
645 let block_capacity = MAX_BLOCK_SIZE as usize;
649 let had_pending = !pending_input.is_empty();
650 let mut uncompressed_data = if had_pending {
651 core::mem::take(&mut pending_input)
652 } else {
653 self.state.matcher.get_next_space()
654 };
655 let mut filled = if had_pending {
656 uncompressed_data.len()
657 } else {
658 0
659 };
660 if uncompressed_data.len() < block_capacity {
661 uncompressed_data.resize(block_capacity, 0);
662 }
663 'read_loop: loop {
664 if reached_eof || filled == block_capacity {
665 break 'read_loop;
666 }
667 let new_bytes = source
668 .read(&mut uncompressed_data[filled..block_capacity])
669 .unwrap();
670 if new_bytes == 0 {
671 reached_eof = true;
672 break 'read_loop;
673 }
674 filled += new_bytes;
675 total_uncompressed += new_bytes as u64;
676 }
677 uncompressed_data.truncate(filled);
678 let mut last_block = reached_eof;
679 let remaining_for_split = if reached_eof {
680 uncompressed_data.len()
681 } else {
682 block_capacity
683 };
684 if !matches!(self.compression_level, CompressionLevel::Uncompressed)
685 && uncompressed_data.len() == block_capacity
686 {
687 let block_len = donor_optimal_block_size(
688 self.compression_level,
689 &uncompressed_data,
690 remaining_for_split,
691 block_capacity,
692 savings,
693 );
694 if block_len < uncompressed_data.len() {
695 pending_input = uncompressed_data.split_off(block_len);
696 if pending_input.capacity() < block_capacity {
703 pending_input.reserve_exact(block_capacity - pending_input.len());
704 }
705 last_block = false;
706 }
707 }
708 #[cfg(feature = "hash")]
710 self.hasher.write(&uncompressed_data);
711 if uncompressed_data.is_empty() {
713 let header = BlockHeader {
714 last_block: true,
715 block_type: crate::blocks::block::BlockType::Raw,
716 block_size: 0,
717 };
718 header.serialize(&mut all_blocks);
719 break;
720 }
721
722 match self.compression_level {
723 CompressionLevel::Uncompressed => {
724 let header = BlockHeader {
725 last_block,
726 block_type: crate::blocks::block::BlockType::Raw,
727 block_size: uncompressed_data.len().try_into().unwrap(),
728 };
729 header.serialize(&mut all_blocks);
730 all_blocks.extend_from_slice(&uncompressed_data);
731 savings +=
732 uncompressed_data.len() as i64 - (3 + uncompressed_data.len()) as i64;
733 }
734 CompressionLevel::Fastest
735 | CompressionLevel::Default
736 | CompressionLevel::Better
737 | CompressionLevel::Best
738 | CompressionLevel::Level(_) => {
739 let before_len = all_blocks.len();
740 let block_len = uncompressed_data.len();
741 compress_block_encoded(
742 &mut self.state,
743 self.compression_level,
744 last_block,
745 uncompressed_data,
746 &mut all_blocks,
747 );
748 savings += block_len as i64 - (all_blocks.len() - before_len) as i64;
749 }
750 }
751 if last_block && pending_input.is_empty() {
752 break;
753 }
754 }
755
756 let single_segment = !use_dictionary_state
760 && source_size_hint_known
761 && total_uncompressed >= 512
762 && total_uncompressed <= window_size;
763 let header = FrameHeader {
764 frame_content_size: Some(total_uncompressed),
765 single_segment,
766 content_checksum: cfg!(feature = "hash"),
767 dictionary_id: if use_dictionary_state {
768 self.dictionary.as_ref().map(|dict| dict.id as u64)
769 } else {
770 None
771 },
772 window_size: if single_segment {
773 None
774 } else {
775 Some(window_size)
776 },
777 magicless: self.magicless,
778 };
779 let mut header_buf: Vec<u8> = Vec::with_capacity(14);
782 header.serialize(&mut header_buf);
783 drain.write_all(&header_buf).unwrap();
784 drain.write_all(&all_blocks).unwrap();
785
786 #[cfg(feature = "hash")]
789 {
790 let content_checksum = self.hasher.finish();
793 drain
794 .write_all(&(content_checksum as u32).to_le_bytes())
795 .unwrap();
796 }
797 }
798
799 pub fn source_mut(&mut self) -> Option<&mut R> {
801 self.uncompressed_data.as_mut()
802 }
803
804 pub fn drain_mut(&mut self) -> Option<&mut W> {
806 self.compressed_data.as_mut()
807 }
808
809 pub fn source(&self) -> Option<&R> {
811 self.uncompressed_data.as_ref()
812 }
813
814 pub fn drain(&self) -> Option<&W> {
816 self.compressed_data.as_ref()
817 }
818
819 pub fn take_source(&mut self) -> Option<R> {
821 self.uncompressed_data.take()
822 }
823
824 pub fn take_drain(&mut self) -> Option<W> {
826 self.compressed_data.take()
827 }
828
829 pub fn replace_matcher(&mut self, mut match_generator: M) -> M {
831 core::mem::swap(&mut match_generator, &mut self.state.matcher);
832 match_generator
833 }
834
835 pub fn set_compression_level(
837 &mut self,
838 compression_level: CompressionLevel,
839 ) -> CompressionLevel {
840 let old = self.compression_level;
841 self.compression_level = compression_level;
842 old
843 }
844
845 pub fn compression_level(&self) -> CompressionLevel {
847 self.compression_level
848 }
849
850 pub fn set_dictionary(
857 &mut self,
858 dictionary: crate::decoding::Dictionary,
859 ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
860 {
861 if dictionary.id == 0 {
862 return Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId);
863 }
864 if let Some(index) = dictionary.offset_hist.iter().position(|&rep| rep == 0) {
865 return Err(
866 crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
867 index: index as u8,
868 },
869 );
870 }
871 self.dictionary_entropy_cache = Some(CachedDictionaryEntropy {
872 huff: dictionary.huf.table.to_encoder_table(),
873 ll_previous: dictionary
874 .fse
875 .literal_lengths
876 .to_encoder_table()
877 .map(|table| PreviousFseTable::Custom(Box::new(table))),
878 ml_previous: dictionary
879 .fse
880 .match_lengths
881 .to_encoder_table()
882 .map(|table| PreviousFseTable::Custom(Box::new(table))),
883 of_previous: dictionary
884 .fse
885 .offsets
886 .to_encoder_table()
887 .map(|table| PreviousFseTable::Custom(Box::new(table))),
888 });
889 Ok(self.dictionary.replace(dictionary))
890 }
891
892 pub fn set_dictionary_from_bytes(
894 &mut self,
895 raw_dictionary: &[u8],
896 ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
897 {
898 let dictionary = crate::decoding::Dictionary::decode_dict(raw_dictionary)?;
899 self.set_dictionary(dictionary)
900 }
901
902 pub fn clear_dictionary(&mut self) -> Option<crate::decoding::Dictionary> {
904 self.dictionary_entropy_cache = None;
905 self.dictionary.take()
906 }
907}
908
909#[cfg(test)]
910mod tests {
911 #[cfg(all(feature = "dict_builder", feature = "std"))]
912 use alloc::format;
913 use alloc::vec;
914
915 use super::FrameCompressor;
916 use crate::blocks::block::BlockType;
917 use crate::common::{MAGIC_NUM, MAX_BLOCK_SIZE};
918 use crate::decoding::{FrameDecoder, block_decoder, frame::read_frame_header};
919 use crate::encoding::{Matcher, Sequence};
920 use alloc::vec::Vec;
921
922 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
923 let mut state = seed;
924 let mut data = Vec::with_capacity(len);
925 for _ in 0..len {
926 state = state
927 .wrapping_mul(6364136223846793005)
928 .wrapping_add(1442695040888963407);
929 data.push((state >> 33) as u8);
930 }
931 data
932 }
933
934 fn first_block_type(frame: &[u8]) -> BlockType {
935 let (_, header_size) = read_frame_header(frame).expect("frame header should parse");
936 let mut decoder = block_decoder::new();
937 let (header, _) = decoder
938 .read_block_header(&frame[header_size as usize..])
939 .expect("block header should parse");
940 header.block_type
941 }
942
943 #[cfg(feature = "std")]
945 #[test]
946 fn fcs_header_written_and_c_zstd_compatible() {
947 let levels = [
948 crate::encoding::CompressionLevel::Uncompressed,
949 crate::encoding::CompressionLevel::Fastest,
950 crate::encoding::CompressionLevel::Default,
951 crate::encoding::CompressionLevel::Better,
952 crate::encoding::CompressionLevel::Best,
953 ];
954 let fcs_2byte = vec![0xCDu8; 300]; let large = vec![0xABu8; 100_000];
956 let inputs: [&[u8]; 5] = [
957 &[],
958 &[0x00],
959 b"abcdefghijklmnopqrstuvwxy\n",
960 &fcs_2byte,
961 &large,
962 ];
963 for level in levels {
964 for data in &inputs {
965 let compressed = crate::encoding::compress_to_vec(*data, level);
966 let header = crate::decoding::frame::read_frame_header(compressed.as_slice())
968 .unwrap()
969 .0;
970 assert_eq!(
971 header.frame_content_size(),
972 data.len() as u64,
973 "FCS mismatch for len={} level={:?}",
974 data.len(),
975 level,
976 );
977 assert_ne!(
980 header.descriptor.frame_content_size_bytes().unwrap(),
981 0,
982 "FCS field must be present for len={} level={:?}",
983 data.len(),
984 level,
985 );
986 let mut decoded = Vec::new();
988 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
989 |e| {
990 panic!(
991 "C zstd decode failed for len={} level={level:?}: {e}",
992 data.len()
993 )
994 },
995 );
996 assert_eq!(
997 decoded.as_slice(),
998 *data,
999 "C zstd roundtrip failed for len={}",
1000 data.len()
1001 );
1002 }
1003 }
1004 }
1005
1006 #[cfg(feature = "std")]
1007 #[test]
1008 fn source_size_hint_fastest_remains_ffi_compatible_small_input() {
1009 let data = vec![0xAB; 2047];
1010 let compressed = {
1011 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1012 compressor.set_source_size_hint(data.len() as u64);
1013 compressor.set_source(data.as_slice());
1014 let mut out = Vec::new();
1015 compressor.set_drain(&mut out);
1016 compressor.compress();
1017 out
1018 };
1019
1020 let mut decoded = Vec::new();
1021 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1022 assert_eq!(decoded, data);
1023 }
1024
1025 #[cfg(feature = "std")]
1026 #[test]
1027 fn small_hinted_default_frame_uses_single_segment_header() {
1028 let data = generate_data(0xD15E_A5ED, 1024);
1029 let compressed = {
1030 let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
1031 compressor.set_source_size_hint(data.len() as u64);
1032 compressor.set_source(data.as_slice());
1033 let mut out = Vec::new();
1034 compressor.set_drain(&mut out);
1035 compressor.compress();
1036 out
1037 };
1038
1039 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1040 assert!(
1041 frame_header.descriptor.single_segment_flag(),
1042 "small hinted default frames should use single-segment header for Rust/FFI parity"
1043 );
1044 assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1045 let mut decoded = Vec::new();
1046 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1047 .expect("ffi decoder must accept single-segment small hinted default frame");
1048 assert_eq!(decoded, data);
1049 }
1050
1051 #[cfg(feature = "std")]
1052 #[test]
1053 fn small_hinted_numeric_default_levels_use_single_segment_header() {
1054 let data = generate_data(0xA11C_E003, 1024);
1055 for level in [
1056 super::CompressionLevel::Level(0),
1057 super::CompressionLevel::Level(3),
1058 ] {
1059 let compressed = {
1060 let mut compressor = FrameCompressor::new(level);
1061 compressor.set_source_size_hint(data.len() as u64);
1062 compressor.set_source(data.as_slice());
1063 let mut out = Vec::new();
1064 compressor.set_drain(&mut out);
1065 compressor.compress();
1066 out
1067 };
1068
1069 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1070 assert!(
1071 frame_header.descriptor.single_segment_flag(),
1072 "small hinted numeric default level frames should use single-segment header (level={level:?})"
1073 );
1074 assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1075 let mut decoded = Vec::new();
1076 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1077 panic!(
1078 "ffi decoder must accept single-segment small hinted numeric default level frame (level={level:?}): {e}"
1079 )
1080 });
1081 assert_eq!(decoded, data);
1082 }
1083 }
1084
1085 #[cfg(feature = "std")]
1086 #[test]
1087 fn source_size_hint_levels_remain_ffi_compatible_small_inputs_matrix() {
1088 let levels = [
1089 super::CompressionLevel::Fastest,
1090 super::CompressionLevel::Default,
1091 super::CompressionLevel::Better,
1092 super::CompressionLevel::Best,
1093 super::CompressionLevel::Level(-1),
1094 super::CompressionLevel::Level(2),
1095 super::CompressionLevel::Level(3),
1096 super::CompressionLevel::Level(4),
1097 super::CompressionLevel::Level(11),
1098 ];
1099 let sizes = [
1100 511usize, 512, 513, 1023, 1024, 1536, 2047, 2048, 4095, 4096, 8191, 16_384, 16_385,
1101 ];
1102
1103 for (seed_idx, seed) in [11u64, 23, 41].into_iter().enumerate() {
1104 for &size in &sizes {
1105 let data = generate_data(seed + seed_idx as u64, size);
1106 for &level in &levels {
1107 let compressed = {
1108 let mut compressor = FrameCompressor::new(level);
1109 compressor.set_source_size_hint(data.len() as u64);
1110 compressor.set_source(data.as_slice());
1111 let mut out = Vec::new();
1112 compressor.set_drain(&mut out);
1113 compressor.compress();
1114 out
1115 };
1116 if matches!(size, 511 | 512) {
1117 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1118 assert_eq!(
1119 frame_header.descriptor.single_segment_flag(),
1120 size == 512,
1121 "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1122 );
1123 }
1124
1125 let mut decoded = Vec::new();
1126 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1127 |e| {
1128 panic!(
1129 "ffi decode failed with source-size hint: level={level:?} size={size} seed={} err={e}",
1130 seed + seed_idx as u64
1131 )
1132 },
1133 );
1134 assert_eq!(
1135 decoded,
1136 data,
1137 "hinted ffi roundtrip mismatch: level={level:?} size={size} seed={}",
1138 seed + seed_idx as u64
1139 );
1140 }
1141 }
1142 }
1143 }
1144
1145 #[cfg(feature = "std")]
1146 #[test]
1147 fn hinted_levels_use_single_segment_header_symmetrically() {
1148 let levels = [
1149 super::CompressionLevel::Fastest,
1150 super::CompressionLevel::Default,
1151 super::CompressionLevel::Better,
1152 super::CompressionLevel::Best,
1153 super::CompressionLevel::Level(0),
1154 super::CompressionLevel::Level(2),
1155 super::CompressionLevel::Level(3),
1156 super::CompressionLevel::Level(4),
1157 super::CompressionLevel::Level(11),
1158 ];
1159 for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1160 let size = 1024 + seed_idx * 97;
1161 let data = generate_data(seed, size);
1162 for &level in &levels {
1163 let compressed = {
1164 let mut compressor = FrameCompressor::new(level);
1165 compressor.set_source_size_hint(data.len() as u64);
1166 compressor.set_source(data.as_slice());
1167 let mut out = Vec::new();
1168 compressor.set_drain(&mut out);
1169 compressor.compress();
1170 out
1171 };
1172 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1173 assert!(
1174 frame_header.descriptor.single_segment_flag(),
1175 "hinted frame should be single-segment for level={level:?} size={}",
1176 data.len()
1177 );
1178 assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1179 let mut decoded = Vec::new();
1180 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1181 panic!(
1182 "ffi decode failed for hinted single-segment parity: level={level:?} size={} err={e}",
1183 data.len()
1184 )
1185 });
1186 assert_eq!(decoded, data);
1187 }
1188 }
1189 }
1190
1191 #[cfg(feature = "std")]
1192 #[test]
1193 fn hinted_levels_pin_511_512_single_segment_boundary() {
1194 let levels = [
1195 super::CompressionLevel::Fastest,
1196 super::CompressionLevel::Default,
1197 super::CompressionLevel::Better,
1198 super::CompressionLevel::Best,
1199 super::CompressionLevel::Level(0),
1200 super::CompressionLevel::Level(2),
1201 super::CompressionLevel::Level(3),
1202 super::CompressionLevel::Level(4),
1203 super::CompressionLevel::Level(11),
1204 ];
1205 for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1206 for &size in &[511usize, 512] {
1207 let data = generate_data(seed + seed_idx as u64, size);
1208 for &level in &levels {
1209 let compressed = {
1210 let mut compressor = FrameCompressor::new(level);
1211 compressor.set_source_size_hint(data.len() as u64);
1212 compressor.set_source(data.as_slice());
1213 let mut out = Vec::new();
1214 compressor.set_drain(&mut out);
1215 compressor.compress();
1216 out
1217 };
1218 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1219 assert_eq!(
1220 frame_header.descriptor.single_segment_flag(),
1221 size == 512,
1222 "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1223 );
1224 let mut decoded = Vec::new();
1225 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1226 |e| {
1227 panic!(
1228 "ffi decode failed at single-segment boundary: level={level:?} size={size} seed={} err={e}",
1229 seed + seed_idx as u64
1230 )
1231 },
1232 );
1233 assert_eq!(decoded, data);
1234 }
1235 }
1236 }
1237 }
1238
1239 #[cfg(feature = "std")]
1240 #[test]
1241 fn fastest_random_block_uses_raw_fast_path() {
1242 let data = generate_data(0xC0FF_EE11, 10 * 1024);
1243 let compressed =
1244 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Fastest);
1245
1246 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1247
1248 let mut decoded = Vec::new();
1249 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1250 assert_eq!(decoded, data);
1251 }
1252
1253 #[cfg(feature = "std")]
1254 #[test]
1255 fn default_random_block_uses_raw_fast_path() {
1256 let data = generate_data(0xD15E_A5ED, 10 * 1024);
1257 let compressed =
1258 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Default);
1259
1260 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1261
1262 let mut decoded = Vec::new();
1263 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1264 assert_eq!(decoded, data);
1265 }
1266
1267 #[cfg(feature = "std")]
1268 #[test]
1269 fn best_random_block_uses_raw_fast_path() {
1270 let data = generate_data(0xB35C_AFE1, 10 * 1024);
1271 let compressed =
1272 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Best);
1273
1274 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1275
1276 let mut decoded = Vec::new();
1277 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1278 assert_eq!(decoded, data);
1279 }
1280
1281 #[cfg(feature = "std")]
1282 #[test]
1283 fn level2_random_block_uses_raw_fast_path() {
1284 let data = generate_data(0xA11C_E222, 10 * 1024);
1285 let compressed =
1286 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Level(2));
1287
1288 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1289
1290 let mut decoded = Vec::new();
1291 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1292 assert_eq!(decoded, data);
1293 }
1294
1295 #[cfg(feature = "std")]
1296 #[test]
1297 fn better_random_block_uses_raw_fast_path() {
1298 let data = generate_data(0xBE77_E111, 10 * 1024);
1299 let compressed =
1300 crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Better);
1301
1302 assert_eq!(first_block_type(&compressed), BlockType::Raw);
1303
1304 let mut decoded = Vec::new();
1305 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1306 assert_eq!(decoded, data);
1307 }
1308
1309 #[cfg(feature = "std")]
1310 #[test]
1311 fn compressible_logs_do_not_fall_back_to_raw_fast_path() {
1312 let mut data = Vec::with_capacity(16 * 1024);
1313 const LINE: &[u8] =
1314 b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1315 while data.len() < 16 * 1024 {
1316 let remaining = 16 * 1024 - data.len();
1317 data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1318 }
1319
1320 fn assert_not_raw_for_level(data: &[u8], level: super::CompressionLevel) {
1321 let compressed = crate::encoding::compress_to_vec(data, level);
1322 assert_ne!(first_block_type(&compressed), BlockType::Raw);
1323 assert!(
1324 compressed.len() < data.len(),
1325 "compressible input should remain compressible for level={level:?}"
1326 );
1327 let mut decoded = Vec::new();
1328 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1329 assert_eq!(decoded, data);
1330 }
1331
1332 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Fastest);
1333 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Default);
1334 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Level(3));
1335 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Better);
1336 assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Best);
1337 }
1338
1339 #[cfg(feature = "std")]
1340 #[test]
1341 fn hinted_small_compressible_frames_use_single_segment_across_levels() {
1342 let mut data = Vec::with_capacity(4 * 1024);
1343 const LINE: &[u8] =
1344 b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1345 while data.len() < 4 * 1024 {
1346 let remaining = 4 * 1024 - data.len();
1347 data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1348 }
1349
1350 for level in [
1351 super::CompressionLevel::Fastest,
1352 super::CompressionLevel::Default,
1353 super::CompressionLevel::Better,
1354 super::CompressionLevel::Best,
1355 super::CompressionLevel::Level(0),
1356 super::CompressionLevel::Level(3),
1357 super::CompressionLevel::Level(4),
1358 super::CompressionLevel::Level(11),
1359 ] {
1360 let compressed = {
1361 let mut compressor = FrameCompressor::new(level);
1362 compressor.set_source_size_hint(data.len() as u64);
1363 compressor.set_source(data.as_slice());
1364 let mut out = Vec::new();
1365 compressor.set_drain(&mut out);
1366 compressor.compress();
1367 out
1368 };
1369 let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1370 assert!(
1371 frame_header.descriptor.single_segment_flag(),
1372 "hinted small compressible frame should use single-segment (level={level:?})"
1373 );
1374 assert_ne!(
1375 first_block_type(&compressed),
1376 BlockType::Raw,
1377 "compressible hinted frame should stay off raw fast path (level={level:?})"
1378 );
1379 assert!(
1380 compressed.len() < data.len(),
1381 "compressible hinted frame should still shrink (level={level:?})"
1382 );
1383 let mut decoded = Vec::new();
1384 zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1385 .unwrap_or_else(|e| panic!("ffi decode failed (level={level:?}): {e}"));
1386 assert_eq!(decoded, data);
1387 }
1388 }
1389
1390 struct NoDictionaryMatcher {
1391 last_space: Vec<u8>,
1392 window_size: u64,
1393 }
1394
1395 impl NoDictionaryMatcher {
1396 fn new(window_size: u64) -> Self {
1397 Self {
1398 last_space: Vec::new(),
1399 window_size,
1400 }
1401 }
1402 }
1403
1404 impl Matcher for NoDictionaryMatcher {
1405 fn get_next_space(&mut self) -> Vec<u8> {
1406 vec![0; self.window_size as usize]
1407 }
1408
1409 fn get_last_space(&mut self) -> &[u8] {
1410 self.last_space.as_slice()
1411 }
1412
1413 fn commit_space(&mut self, space: Vec<u8>) {
1414 self.last_space = space;
1415 }
1416
1417 fn skip_matching(&mut self) {}
1418
1419 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1420 handle_sequence(Sequence::Literals {
1421 literals: self.last_space.as_slice(),
1422 });
1423 }
1424
1425 fn reset(&mut self, _level: super::CompressionLevel) {
1426 self.last_space.clear();
1427 }
1428
1429 fn window_size(&self) -> u64 {
1430 self.window_size
1431 }
1432 }
1433
1434 #[test]
1435 fn frame_starts_with_magic_num() {
1436 let mock_data = [1_u8, 2, 3].as_slice();
1437 let mut output: Vec<u8> = Vec::new();
1438 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1439 compressor.set_source(mock_data);
1440 compressor.set_drain(&mut output);
1441
1442 compressor.compress();
1443 assert!(output.starts_with(&MAGIC_NUM.to_le_bytes()));
1444 }
1445
1446 #[test]
1447 fn very_simple_raw_compress() {
1448 let mock_data = [1_u8, 2, 3].as_slice();
1449 let mut output: Vec<u8> = Vec::new();
1450 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1451 compressor.set_source(mock_data);
1452 compressor.set_drain(&mut output);
1453
1454 compressor.compress();
1455 }
1456
1457 #[test]
1458 fn very_simple_compress() {
1459 let mut mock_data = vec![0; 1 << 17];
1460 mock_data.extend(vec![1; (1 << 17) - 1]);
1461 mock_data.extend(vec![2; (1 << 18) - 1]);
1462 mock_data.extend(vec![2; 1 << 17]);
1463 mock_data.extend(vec![3; (1 << 17) - 1]);
1464 let mut output: Vec<u8> = Vec::new();
1465 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1466 compressor.set_source(mock_data.as_slice());
1467 compressor.set_drain(&mut output);
1468
1469 compressor.compress();
1470
1471 let mut decoder = FrameDecoder::new();
1472 let mut decoded = Vec::with_capacity(mock_data.len());
1473 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1474 assert_eq!(mock_data, decoded);
1475
1476 let mut decoded = Vec::new();
1477 zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1478 assert_eq!(mock_data, decoded);
1479 }
1480
1481 #[test]
1482 fn rle_compress() {
1483 let mock_data = vec![0; 1 << 19];
1484 let mut output: Vec<u8> = Vec::new();
1485 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1486 compressor.set_source(mock_data.as_slice());
1487 compressor.set_drain(&mut output);
1488
1489 compressor.compress();
1490
1491 let mut decoder = FrameDecoder::new();
1492 let mut decoded = Vec::with_capacity(mock_data.len());
1493 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1494 assert_eq!(mock_data, decoded);
1495 }
1496
1497 #[test]
1498 fn aaa_compress() {
1499 let mock_data = vec![0, 1, 3, 4, 5];
1500 let mut output: Vec<u8> = Vec::new();
1501 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1502 compressor.set_source(mock_data.as_slice());
1503 compressor.set_drain(&mut output);
1504
1505 compressor.compress();
1506
1507 let mut decoder = FrameDecoder::new();
1508 let mut decoded = Vec::with_capacity(mock_data.len());
1509 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1510 assert_eq!(mock_data, decoded);
1511
1512 let mut decoded = Vec::new();
1513 zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1514 assert_eq!(mock_data, decoded);
1515 }
1516
1517 #[test]
1518 fn dictionary_compression_sets_required_dict_id_and_roundtrips() {
1519 let dict_raw = include_bytes!("../../dict_tests/dictionary");
1520 let dict_for_encoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1521 let dict_for_decoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1522
1523 let mut data = Vec::new();
1524 for _ in 0..8 {
1525 data.extend_from_slice(&dict_for_decoder.dict_content[..2048]);
1526 }
1527
1528 let mut with_dict = Vec::new();
1529 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1530 let previous = compressor
1531 .set_dictionary_from_bytes(dict_raw)
1532 .expect("dictionary bytes should parse");
1533 assert!(
1534 previous.is_none(),
1535 "first dictionary insert should return None"
1536 );
1537 assert_eq!(
1538 compressor
1539 .set_dictionary(dict_for_encoder)
1540 .expect("valid dictionary should attach")
1541 .expect("set_dictionary_from_bytes inserted previous dictionary")
1542 .id,
1543 dict_for_decoder.id
1544 );
1545 compressor.set_source(data.as_slice());
1546 compressor.set_drain(&mut with_dict);
1547 compressor.compress();
1548
1549 let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1550 .expect("encoded stream should have a frame header");
1551 assert_eq!(frame_header.dictionary_id(), Some(dict_for_decoder.id));
1552
1553 let mut decoder = FrameDecoder::new();
1554 let mut missing_dict_target = Vec::with_capacity(data.len());
1555 let err = decoder
1556 .decode_all_to_vec(&with_dict, &mut missing_dict_target)
1557 .unwrap_err();
1558 assert!(
1559 matches!(
1560 &err,
1561 crate::decoding::errors::FrameDecoderError::DictNotProvided { .. }
1562 ),
1563 "dict-compressed stream should require dictionary id, got: {err:?}"
1564 );
1565
1566 let mut decoder = FrameDecoder::new();
1567 decoder.add_dict(dict_for_decoder).unwrap();
1568 let mut decoded = Vec::with_capacity(data.len());
1569 decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1570 assert_eq!(decoded, data);
1571
1572 let mut ffi_decoder = zstd::bulk::Decompressor::with_dictionary(dict_raw).unwrap();
1573 let mut ffi_decoded = Vec::with_capacity(data.len());
1574 let ffi_written = ffi_decoder
1575 .decompress_to_buffer(with_dict.as_slice(), &mut ffi_decoded)
1576 .unwrap();
1577 assert_eq!(ffi_written, data.len());
1578 assert_eq!(ffi_decoded, data);
1579 }
1580
1581 #[cfg(all(feature = "dict_builder", feature = "std"))]
1582 #[test]
1583 fn dictionary_compression_roundtrips_with_dict_builder_dictionary() {
1584 use std::io::Cursor;
1585
1586 let mut training = Vec::new();
1587 for idx in 0..256u32 {
1588 training.extend_from_slice(
1589 format!("tenant=demo table=orders key={idx} region=eu\n").as_bytes(),
1590 );
1591 }
1592 let mut raw_dict = Vec::new();
1593 crate::dictionary::create_raw_dict_from_source(
1594 Cursor::new(training.as_slice()),
1595 training.len(),
1596 &mut raw_dict,
1597 4096,
1598 )
1599 .expect("dict_builder training should succeed");
1600 assert!(
1601 !raw_dict.is_empty(),
1602 "dict_builder produced an empty dictionary"
1603 );
1604
1605 let dict_id = 0xD1C7_0008;
1606 let encoder_dict =
1607 crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1608 let decoder_dict =
1609 crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1610
1611 let mut payload = Vec::new();
1612 for idx in 0..96u32 {
1613 payload.extend_from_slice(
1614 format!(
1615 "tenant=demo table=orders op=put key={idx} value=aaaaabbbbbcccccdddddeeeee\n"
1616 )
1617 .as_bytes(),
1618 );
1619 }
1620
1621 let mut without_dict = Vec::new();
1622 let mut baseline = FrameCompressor::new(super::CompressionLevel::Fastest);
1623 baseline.set_source(payload.as_slice());
1624 baseline.set_drain(&mut without_dict);
1625 baseline.compress();
1626
1627 let mut with_dict = Vec::new();
1628 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1629 compressor
1630 .set_dictionary(encoder_dict)
1631 .expect("valid dict_builder dictionary should attach");
1632 compressor.set_source(payload.as_slice());
1633 compressor.set_drain(&mut with_dict);
1634 compressor.compress();
1635
1636 let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1637 .expect("encoded stream should have a frame header");
1638 assert_eq!(frame_header.dictionary_id(), Some(dict_id));
1639 let mut decoder = FrameDecoder::new();
1640 decoder.add_dict(decoder_dict).unwrap();
1641 let mut decoded = Vec::with_capacity(payload.len());
1642 decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1643 assert_eq!(decoded, payload);
1644 assert!(
1645 with_dict.len() < without_dict.len(),
1646 "trained dictionary should improve compression for this small payload"
1647 );
1648 }
1649
1650 #[test]
1651 fn set_dictionary_from_bytes_seeds_entropy_tables_for_first_block() {
1652 let dict_raw = include_bytes!("../../dict_tests/dictionary");
1653 let mut output = Vec::new();
1654 let input = b"";
1655
1656 let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1657 let previous = compressor
1658 .set_dictionary_from_bytes(dict_raw)
1659 .expect("dictionary bytes should parse");
1660 assert!(previous.is_none());
1661
1662 compressor.set_source(input.as_slice());
1663 compressor.set_drain(&mut output);
1664 compressor.compress();
1665
1666 assert!(
1667 compressor.state.last_huff_table.is_some(),
1668 "dictionary entropy should seed previous huffman table before first block"
1669 );
1670 assert!(
1671 compressor.state.fse_tables.ll_previous.is_some(),
1672 "dictionary entropy should seed previous ll table before first block"
1673 );
1674 assert!(
1675 compressor.state.fse_tables.ml_previous.is_some(),
1676 "dictionary entropy should seed previous ml table before first block"
1677 );
1678 assert!(
1679 compressor.state.fse_tables.of_previous.is_some(),
1680 "dictionary entropy should seed previous of table before first block"
1681 );
1682 }
1683
1684 #[test]
1685 fn set_dictionary_rejects_zero_dictionary_id() {
1686 let invalid = crate::decoding::Dictionary {
1687 id: 0,
1688 fse: crate::decoding::scratch::FSEScratch::new(),
1689 huf: crate::decoding::scratch::HuffmanScratch::new(),
1690 dict_content: vec![1, 2, 3],
1691 offset_hist: [1, 4, 8],
1692 };
1693
1694 let mut compressor: FrameCompressor<
1695 &[u8],
1696 Vec<u8>,
1697 crate::encoding::match_generator::MatchGeneratorDriver,
1698 > = FrameCompressor::new(super::CompressionLevel::Fastest);
1699 let result = compressor.set_dictionary(invalid);
1700 assert!(matches!(
1701 result,
1702 Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId)
1703 ));
1704 }
1705
1706 #[test]
1707 fn set_dictionary_rejects_zero_repeat_offsets() {
1708 let invalid = crate::decoding::Dictionary {
1709 id: 1,
1710 fse: crate::decoding::scratch::FSEScratch::new(),
1711 huf: crate::decoding::scratch::HuffmanScratch::new(),
1712 dict_content: vec![1, 2, 3],
1713 offset_hist: [0, 4, 8],
1714 };
1715
1716 let mut compressor: FrameCompressor<
1717 &[u8],
1718 Vec<u8>,
1719 crate::encoding::match_generator::MatchGeneratorDriver,
1720 > = FrameCompressor::new(super::CompressionLevel::Fastest);
1721 let result = compressor.set_dictionary(invalid);
1722 assert!(matches!(
1723 result,
1724 Err(
1725 crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
1726 index: 0
1727 }
1728 )
1729 ));
1730 }
1731
1732 #[test]
1733 fn uncompressed_mode_does_not_require_dictionary() {
1734 let dict_id = 0xABCD_0001;
1735 let dict =
1736 crate::decoding::Dictionary::from_raw_content(dict_id, b"shared-history".to_vec())
1737 .expect("raw dictionary should be valid");
1738
1739 let payload = b"plain-bytes-that-should-stay-raw";
1740 let mut output = Vec::new();
1741 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1742 compressor
1743 .set_dictionary(dict)
1744 .expect("dictionary should attach in uncompressed mode");
1745 compressor.set_source(payload.as_slice());
1746 compressor.set_drain(&mut output);
1747 compressor.compress();
1748
1749 let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1750 .expect("encoded frame should have a header");
1751 assert_eq!(
1752 frame_header.dictionary_id(),
1753 None,
1754 "raw/uncompressed frames must not advertise dictionary dependency"
1755 );
1756
1757 let mut decoder = FrameDecoder::new();
1758 let mut decoded = Vec::with_capacity(payload.len());
1759 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1760 assert_eq!(decoded, payload);
1761 }
1762
1763 #[test]
1764 fn dictionary_roundtrip_stays_valid_after_output_exceeds_window() {
1765 use crate::encoding::match_generator::MatchGeneratorDriver;
1766
1767 let dict_id = 0xABCD_0002;
1768 let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1769 .expect("raw dictionary should be valid");
1770 let dict_for_decoder =
1771 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1772 .expect("raw dictionary should be valid");
1773
1774 let payload = b"abcdefgh".repeat(512 * 1024 / 8 + 64);
1779 let matcher = MatchGeneratorDriver::new(1024, 1);
1780
1781 let mut no_dict_output = Vec::new();
1782 let mut no_dict_compressor =
1783 FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1784 no_dict_compressor.set_source(payload.as_slice());
1785 no_dict_compressor.set_drain(&mut no_dict_output);
1786 no_dict_compressor.compress();
1787 let (no_dict_frame_header, _) =
1788 crate::decoding::frame::read_frame_header(no_dict_output.as_slice())
1789 .expect("baseline frame should have a header");
1790 let no_dict_window = no_dict_frame_header
1791 .window_size()
1792 .expect("window size should be present");
1793
1794 let mut output = Vec::new();
1795 let matcher = MatchGeneratorDriver::new(1024, 1);
1796 let mut compressor =
1797 FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1798 compressor
1799 .set_dictionary(dict)
1800 .expect("dictionary should attach");
1801 compressor.set_source(payload.as_slice());
1802 compressor.set_drain(&mut output);
1803 compressor.compress();
1804
1805 let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1806 .expect("encoded frame should have a header");
1807 let advertised_window = frame_header
1808 .window_size()
1809 .expect("window size should be present");
1810 assert_eq!(
1811 advertised_window, no_dict_window,
1812 "dictionary priming must not inflate advertised window size"
1813 );
1814 assert!(
1815 payload.len() > advertised_window as usize,
1816 "test must cross the advertised window boundary"
1817 );
1818
1819 let mut decoder = FrameDecoder::new();
1820 decoder.add_dict(dict_for_decoder).unwrap();
1821 let mut decoded = Vec::with_capacity(payload.len());
1822 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1823 assert_eq!(decoded, payload);
1824 }
1825
1826 #[test]
1827 fn source_size_hint_with_dictionary_keeps_roundtrip_and_nonincreasing_window() {
1828 let dict_id = 0xABCD_0004;
1829 let dict_content = b"abcd".repeat(1024); let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1831 let dict_for_decoder =
1832 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1833 let payload = b"abcdabcdabcdabcd".repeat(128);
1834
1835 let mut hinted_output = Vec::new();
1836 let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1837 hinted.set_dictionary(dict).unwrap();
1838 hinted.set_source_size_hint(1);
1839 hinted.set_source(payload.as_slice());
1840 hinted.set_drain(&mut hinted_output);
1841 hinted.compress();
1842
1843 let mut no_hint_output = Vec::new();
1844 let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1845 no_hint
1846 .set_dictionary(
1847 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1848 .unwrap(),
1849 )
1850 .unwrap();
1851 no_hint.set_source(payload.as_slice());
1852 no_hint.set_drain(&mut no_hint_output);
1853 no_hint.compress();
1854
1855 let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1856 .expect("encoded frame should have a header")
1857 .0
1858 .window_size()
1859 .expect("window size should be present");
1860 let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1861 .expect("encoded frame should have a header")
1862 .0
1863 .window_size()
1864 .expect("window size should be present");
1865 assert!(
1866 hinted_window <= no_hint_window,
1867 "source-size hint should not increase advertised window with dictionary priming",
1868 );
1869
1870 let mut decoder = FrameDecoder::new();
1871 decoder.add_dict(dict_for_decoder).unwrap();
1872 let mut decoded = Vec::with_capacity(payload.len());
1873 decoder
1874 .decode_all_to_vec(&hinted_output, &mut decoded)
1875 .unwrap();
1876 assert_eq!(decoded, payload);
1877 }
1878
1879 #[test]
1880 fn source_size_hint_with_dictionary_keeps_roundtrip_for_larger_payload() {
1881 let dict_id = 0xABCD_0005;
1882 let dict_content = b"abcd".repeat(1024); let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1884 let dict_for_decoder =
1885 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1886 let payload = b"abcd".repeat(1024); let payload_len = payload.len() as u64;
1888
1889 let mut hinted_output = Vec::new();
1890 let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1891 hinted.set_dictionary(dict).unwrap();
1892 hinted.set_source_size_hint(payload_len);
1893 hinted.set_source(payload.as_slice());
1894 hinted.set_drain(&mut hinted_output);
1895 hinted.compress();
1896
1897 let mut no_hint_output = Vec::new();
1898 let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1899 no_hint
1900 .set_dictionary(
1901 crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1902 .unwrap(),
1903 )
1904 .unwrap();
1905 no_hint.set_source(payload.as_slice());
1906 no_hint.set_drain(&mut no_hint_output);
1907 no_hint.compress();
1908
1909 let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1910 .expect("encoded frame should have a header")
1911 .0
1912 .window_size()
1913 .expect("window size should be present");
1914 let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1915 .expect("encoded frame should have a header")
1916 .0
1917 .window_size()
1918 .expect("window size should be present");
1919 assert!(
1920 hinted_window <= no_hint_window,
1921 "source-size hint should not increase advertised window with dictionary priming",
1922 );
1923
1924 let mut decoder = FrameDecoder::new();
1925 decoder.add_dict(dict_for_decoder).unwrap();
1926 let mut decoded = Vec::with_capacity(payload.len());
1927 decoder
1928 .decode_all_to_vec(&hinted_output, &mut decoded)
1929 .unwrap();
1930 assert_eq!(decoded, payload);
1931 }
1932
1933 #[test]
1934 fn custom_matcher_without_dictionary_priming_does_not_advertise_dict_id() {
1935 let dict_id = 0xABCD_0003;
1936 let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1937 .expect("raw dictionary should be valid");
1938 let payload = b"abcdefghabcdefgh";
1939
1940 let mut output = Vec::new();
1941 let matcher = NoDictionaryMatcher::new(64);
1942 let mut compressor =
1943 FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1944 compressor
1945 .set_dictionary(dict)
1946 .expect("dictionary should attach");
1947 compressor.set_source(payload.as_slice());
1948 compressor.set_drain(&mut output);
1949 compressor.compress();
1950
1951 let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1952 .expect("encoded frame should have a header");
1953 assert_eq!(
1954 frame_header.dictionary_id(),
1955 None,
1956 "matchers that do not support dictionary priming must not advertise dictionary dependency"
1957 );
1958
1959 let mut decoder = FrameDecoder::new();
1960 let mut decoded = Vec::with_capacity(payload.len());
1961 decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1962 assert_eq!(decoded, payload);
1963 }
1964
1965 #[cfg(feature = "hash")]
1966 #[test]
1967 fn checksum_two_frames_reused_compressor() {
1968 let data: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
1974
1975 let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1976
1977 let mut compressed1 = Vec::new();
1979 compressor.set_source(data.as_slice());
1980 compressor.set_drain(&mut compressed1);
1981 compressor.compress();
1982
1983 let mut compressed2 = Vec::new();
1985 compressor.set_source(data.as_slice());
1986 compressor.set_drain(&mut compressed2);
1987 compressor.compress();
1988
1989 fn decode_and_collect(compressed: &[u8]) -> (Vec<u8>, Option<u32>, Option<u32>) {
1990 let mut decoder = FrameDecoder::new();
1991 let mut source = compressed;
1992 decoder.reset(&mut source).unwrap();
1993 while !decoder.is_finished() {
1994 decoder
1995 .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
1996 .unwrap();
1997 }
1998 let mut decoded = Vec::new();
1999 decoder.collect_to_writer(&mut decoded).unwrap();
2000 (
2001 decoded,
2002 decoder.get_checksum_from_data(),
2003 decoder.get_calculated_checksum(),
2004 )
2005 }
2006
2007 let (decoded1, chksum_from_data1, chksum_calculated1) = decode_and_collect(&compressed1);
2008 assert_eq!(decoded1, data, "frame 1: decoded data mismatch");
2009 assert_eq!(
2010 chksum_from_data1, chksum_calculated1,
2011 "frame 1: checksum mismatch"
2012 );
2013
2014 let (decoded2, chksum_from_data2, chksum_calculated2) = decode_and_collect(&compressed2);
2015 assert_eq!(decoded2, data, "frame 2: decoded data mismatch");
2016 assert_eq!(
2017 chksum_from_data2, chksum_calculated2,
2018 "frame 2: checksum mismatch"
2019 );
2020
2021 assert_eq!(
2024 chksum_from_data1, chksum_from_data2,
2025 "frame 1 and frame 2 should have the same checksum (same data, hash must reset per frame)"
2026 );
2027 }
2028
2029 #[cfg(feature = "std")]
2030 #[test]
2031 fn fuzz_targets() {
2032 use std::io::Read;
2033 fn decode_szstd(data: &mut dyn std::io::Read) -> Vec<u8> {
2034 let mut decoder = crate::decoding::StreamingDecoder::new(data).unwrap();
2035 let mut result: Vec<u8> = Vec::new();
2036 decoder.read_to_end(&mut result).expect("Decoding failed");
2037 result
2038 }
2039
2040 fn decode_szstd_writer(mut data: impl Read) -> Vec<u8> {
2041 let mut decoder = crate::decoding::FrameDecoder::new();
2042 decoder.reset(&mut data).unwrap();
2043 let mut result = vec![];
2044 while !decoder.is_finished() || decoder.can_collect() > 0 {
2045 decoder
2046 .decode_blocks(
2047 &mut data,
2048 crate::decoding::BlockDecodingStrategy::UptoBytes(1024 * 1024),
2049 )
2050 .unwrap();
2051 decoder.collect_to_writer(&mut result).unwrap();
2052 }
2053 result
2054 }
2055
2056 fn encode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2057 zstd::stream::encode_all(std::io::Cursor::new(data), 3)
2058 }
2059
2060 fn encode_szstd_uncompressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2061 let mut input = Vec::new();
2062 data.read_to_end(&mut input).unwrap();
2063
2064 crate::encoding::compress_to_vec(
2065 input.as_slice(),
2066 crate::encoding::CompressionLevel::Uncompressed,
2067 )
2068 }
2069
2070 fn encode_szstd_compressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2071 let mut input = Vec::new();
2072 data.read_to_end(&mut input).unwrap();
2073
2074 crate::encoding::compress_to_vec(
2075 input.as_slice(),
2076 crate::encoding::CompressionLevel::Fastest,
2077 )
2078 }
2079
2080 fn decode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2081 let mut output = Vec::new();
2082 zstd::stream::copy_decode(data, &mut output)?;
2083 Ok(output)
2084 }
2085 if std::fs::exists("fuzz/artifacts/interop").unwrap_or(false) {
2086 for file in std::fs::read_dir("fuzz/artifacts/interop").unwrap() {
2087 if file.as_ref().unwrap().file_type().unwrap().is_file() {
2088 let data = std::fs::read(file.unwrap().path()).unwrap();
2089 let data = data.as_slice();
2090 let compressed = encode_zstd(data).unwrap();
2092 let decoded = decode_szstd(&mut compressed.as_slice());
2093 let decoded2 = decode_szstd_writer(&mut compressed.as_slice());
2094 assert!(
2095 decoded == data,
2096 "Decoded data did not match the original input during decompression"
2097 );
2098 assert_eq!(
2099 decoded2, data,
2100 "Decoded data did not match the original input during decompression"
2101 );
2102
2103 let mut input = data;
2106 let compressed = encode_szstd_uncompressed(&mut input);
2107 let decoded = decode_zstd(&compressed).unwrap();
2108 assert_eq!(
2109 decoded, data,
2110 "Decoded data did not match the original input during compression"
2111 );
2112 let mut input = data;
2114 let compressed = encode_szstd_compressed(&mut input);
2115 let decoded = decode_zstd(&compressed).unwrap();
2116 assert_eq!(
2117 decoded, data,
2118 "Decoded data did not match the original input during compression"
2119 );
2120 }
2121 }
2122 }
2123 }
2124
2125 #[test]
2131 fn donor_split_block_from_borders_keeps_homogeneous_block() {
2132 let block = vec![0xAAu8; MAX_BLOCK_SIZE as usize];
2133 let split = super::donor_split_block_from_borders(&block);
2134 assert_eq!(split, MAX_BLOCK_SIZE as usize);
2135 }
2136
2137 #[test]
2151 fn donor_split_block_from_borders_returns_midpoint_for_centred_transition() {
2152 let mut block = vec![0u8; MAX_BLOCK_SIZE as usize];
2153 for (i, byte) in block
2154 .iter_mut()
2155 .enumerate()
2156 .skip(MAX_BLOCK_SIZE as usize / 2)
2157 {
2158 *byte = (i % 251 + 1) as u8;
2159 }
2160 let split = super::donor_split_block_from_borders(&block);
2161 assert_eq!(
2162 split,
2163 64 * 1024,
2164 "centred-transition fixture must take the symmetric \
2165 midpoint arm (`abs_diff < min_distance`), got {split}"
2166 );
2167 }
2168
2169 #[test]
2174 fn donor_pre_split_level_dispatches_by_compression_level() {
2175 use crate::encoding::CompressionLevel;
2176 assert_eq!(
2177 super::donor_pre_split_level(CompressionLevel::Fastest),
2178 None
2179 );
2180 assert_eq!(
2181 super::donor_pre_split_level(CompressionLevel::Default),
2182 None
2183 );
2184 assert_eq!(super::donor_pre_split_level(CompressionLevel::Better), None);
2185 assert_eq!(
2186 super::donor_pre_split_level(CompressionLevel::Level(7)),
2187 None
2188 );
2189 assert_eq!(
2190 super::donor_pre_split_level(CompressionLevel::Level(11)),
2191 Some(0)
2192 );
2193 assert_eq!(
2194 super::donor_pre_split_level(CompressionLevel::Level(15)),
2195 Some(0)
2196 );
2197 assert_eq!(
2198 super::donor_pre_split_level(CompressionLevel::Level(16)),
2199 Some(4)
2200 );
2201 assert_eq!(
2202 super::donor_pre_split_level(CompressionLevel::Level(22)),
2203 Some(4)
2204 );
2205 }
2206
2207 #[test]
2214 fn level_13_borders_split_roundtrips_through_own_decoder() {
2215 use crate::encoding::CompressionLevel;
2216 let mut data = vec![0u8; 256 * 1024];
2217 for (i, byte) in data.iter_mut().enumerate() {
2220 *byte = if i < 128 * 1024 {
2221 (i & 0x07) as u8
2222 } else {
2223 (i % 251 + 1) as u8
2224 };
2225 }
2226
2227 let mut compressed = Vec::new();
2228 let mut compressor = FrameCompressor::new(CompressionLevel::Level(13));
2229 compressor.set_source(data.as_slice());
2230 compressor.set_drain(&mut compressed);
2231 compressor.compress();
2232
2233 let mut decoder = FrameDecoder::new();
2234 let mut source = compressed.as_slice();
2235 decoder
2236 .reset(&mut source)
2237 .expect("frame header should parse");
2238 while !decoder.is_finished() {
2239 decoder
2240 .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
2241 .expect("decode should succeed");
2242 }
2243 let mut decoded = Vec::with_capacity(data.len());
2244 decoder.collect_to_writer(&mut decoded).unwrap();
2245 assert_eq!(decoded, data, "roundtrip must reproduce the input verbatim");
2246 }
2247
2248 #[cfg(feature = "std")]
2260 #[test]
2261 fn set_compression_level_then_compress_refreshes_strategy_tag() {
2262 use super::CompressionLevel;
2263 use crate::encoding::strategy::StrategyTag;
2264
2265 let data = vec![0xABu8; 256];
2266 let mut out = Vec::new();
2267 let mut compressor = FrameCompressor::new(CompressionLevel::Fastest);
2268 let initial_tag = compressor.state.strategy_tag;
2269 assert_eq!(
2270 initial_tag,
2271 StrategyTag::for_compression_level(CompressionLevel::Fastest),
2272 "construction-time strategy_tag must reflect initial level",
2273 );
2274
2275 let new_level = CompressionLevel::Level(20);
2279 compressor.set_compression_level(new_level);
2280 compressor.set_source(data.as_slice());
2281 compressor.set_drain(&mut out);
2282 compressor.compress();
2283
2284 let new_tag = compressor.state.strategy_tag;
2285 let expected = StrategyTag::for_compression_level(new_level);
2286 assert_eq!(
2287 new_tag, expected,
2288 "strategy_tag must follow set_compression_level → compress, \
2289 got {new_tag:?} expected {expected:?}",
2290 );
2291 assert_eq!(
2292 expected,
2293 StrategyTag::BtUltra2,
2294 "test fixture invariant: Level(20) must resolve to BtUltra2 \
2295 so the post-switch tag visibly crosses the band boundary",
2296 );
2297 assert_ne!(
2298 new_tag, initial_tag,
2299 "test fixture invariant: chosen levels must resolve to \
2300 different StrategyTag variants",
2301 );
2302 }
2303
2304 #[test]
2308 fn magicless_frame_omits_magic_and_roundtrips() {
2309 use crate::common::MAGIC_NUM;
2310 let input: alloc::vec::Vec<u8> = (0..512u32).map(|i| (i ^ 0xA5) as u8).collect();
2311
2312 let mut output: Vec<u8> = Vec::new();
2314 let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
2315 compressor.set_magicless(true);
2316 compressor.set_source(input.as_slice());
2317 compressor.set_drain(&mut output);
2318 compressor.compress();
2319
2320 assert!(
2322 !output.starts_with(&MAGIC_NUM.to_le_bytes()),
2323 "magicless frame must omit the 4-byte magic prefix",
2324 );
2325
2326 let mut decoder = crate::decoding::FrameDecoder::new();
2328 decoder.set_magicless(true);
2329 let mut cursor: &[u8] = output.as_slice();
2330 decoder.init(&mut cursor).expect("magicless init");
2331 decoder
2332 .decode_blocks(&mut cursor, crate::decoding::BlockDecodingStrategy::All)
2333 .expect("decode_blocks");
2334 let mut decoded: Vec<u8> = Vec::new();
2335 decoder
2336 .collect_to_writer(&mut decoded)
2337 .expect("collect_to_writer");
2338 assert_eq!(decoded, input, "magicless roundtrip must preserve bytes");
2339
2340 use crate::decoding::errors::{FrameDecoderError, ReadFrameHeaderError};
2351 let mut std_decoder = crate::decoding::FrameDecoder::new();
2352 let std_init = std_decoder.init(output.as_slice());
2353 match std_init {
2354 Err(FrameDecoderError::ReadFrameHeaderError(
2355 ReadFrameHeaderError::BadMagicNumber(_) | ReadFrameHeaderError::SkipFrame { .. },
2356 )) => {}
2357 other => panic!(
2358 "standard decoder must reject a magicless frame with \
2359 ReadFrameHeaderError::BadMagicNumber or SkipFrame, got {other:?}",
2360 ),
2361 }
2362 }
2363}