Skip to main content

lz4rip_encode/
compress.rs

1//! LZ4 block compression.
2
3use core::fmt;
4
5use crate::hashtable::HashTable;
6use crate::verified_sink::VerifiedSliceSink;
7#[cfg(feature = "alloc")]
8use alloc::vec;
9use lz4rip_core::CompressError;
10use lz4rip_core::END_OFFSET;
11use lz4rip_core::LZ4_MIN_LENGTH;
12use lz4rip_core::MAX_DISTANCE;
13use lz4rip_core::MFLIMIT;
14use lz4rip_core::MINMATCH;
15use lz4rip_core::Sink;
16use lz4rip_core::WINDOW_SIZE;
17
18#[cfg(feature = "alloc")]
19use alloc::vec::Vec;
20
21pub(crate) use crate::hashtable::HashTableU32;
22pub(crate) use crate::hashtable::HashTableU32U16;
23pub use crate::hashtable::{DEFAULT_DICT_ENTRIES, DEFAULT_NODICT_ENTRIES, MIN_ENTRIES};
24
25/// Inputs up to this size reuse the no-dict hash table across calls (epoch-based
26/// table reuse); larger inputs clear it. Independent of table entry count.
27const EPOCH_THRESHOLD: usize = 8 * 1024;
28
29/// Skip acceleration: step grows by 1 every `1 << N` consecutive non-matches.
30/// C lz4 uses 6; see DESIGN.md for tradeoff analysis.
31const INCREASE_STEPSIZE_BITSHIFT: usize = 3;
32
33/// Inputs up to this size use the dict table read-only (no per-call clearing or
34/// table writes). Self-references within small inputs are rare; the dict provides
35/// virtually all matches. Skips the 8 KB table.clear() and all put_at writes.
36const DICT_READONLY_LIMIT: usize = 256;
37
38#[inline]
39fn token_from_literal(lit_len: usize) -> u8 {
40    if lit_len < 0xF {
41        (lit_len as u8) << 4
42    } else {
43        0xF0
44    }
45}
46
47#[inline]
48fn token_from_literal_and_match_length(lit_len: usize, duplicate_length: usize) -> u8 {
49    let mut token = if lit_len < 0xF {
50        (lit_len as u8) << 4
51    } else {
52        0xF0
53    };
54
55    token |= if duplicate_length < 0xF {
56        duplicate_length as u8
57    } else {
58        0xF
59    };
60
61    token
62}
63
64/// Write a variable-length integer in the LZ4 encoding.
65#[inline]
66pub fn write_integer(output: &mut impl Sink, mut n: usize) {
67    while n >= 0xFF {
68        n -= 0xFF;
69        push_byte(output, 0xFF);
70    }
71    push_byte(output, n as u8);
72}
73
74#[cold]
75fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) {
76    let lit_len = input.len() - start;
77
78    let token = token_from_literal(lit_len);
79    push_byte(output, token);
80    if lit_len >= 0xF {
81        write_integer(output, lit_len - 0xF);
82    }
83    output.extend_from_slice(&input[start..]);
84}
85
86#[inline]
87fn backtrack_match(
88    input: &[u8],
89    cur: &mut usize,
90    literal_start: usize,
91    source: &[u8],
92    candidate: &mut usize,
93) {
94    while *candidate > 0 && *cur > literal_start && input[*cur - 1] == source[*candidate - 1] {
95        *cur -= 1;
96        *candidate -= 1;
97    }
98}
99
100/// Core block compression loop, monomorphized over hash table type and dict mode.
101#[inline(never)]
102pub(crate) fn compress_internal<
103    T: HashTable,
104    const USE_DICT: bool,
105    const HAS_OFFSET: bool,
106    const READONLY: bool,
107    S: Sink,
108>(
109    input: &[u8],
110    input_pos: usize,
111    output: &mut S,
112    table: &mut T,
113    ext_dict: &[u8],
114    input_stream_offset: usize,
115) -> Result<usize, CompressError> {
116    assert!(input_pos <= input.len());
117    if USE_DICT {
118        assert!(ext_dict.len() <= WINDOW_SIZE);
119        assert!(ext_dict.len() <= input_stream_offset);
120        assert!(
121            input_stream_offset
122                .checked_add(input.len())
123                .and_then(|i| i.checked_add(ext_dict.len()))
124                .is_some_and(|i| i <= isize::MAX as usize)
125        );
126    } else {
127        assert!(ext_dict.is_empty());
128    }
129    if !HAS_OFFSET {
130        debug_assert_eq!(input_stream_offset, 0);
131    }
132    let input_stream_offset = if HAS_OFFSET { input_stream_offset } else { 0 };
133    if output.capacity() - output.pos() < get_maximum_output_size(input.len() - input_pos) {
134        return Err(CompressError::OutputTooSmall);
135    }
136
137    let output_start_pos = output.pos();
138    if input.len() - input_pos < LZ4_MIN_LENGTH {
139        handle_last_literals(output, input, input_pos);
140        return Ok(output.pos() - output_start_pos);
141    }
142
143    let ext_dict_stream_offset = input_stream_offset - ext_dict.len();
144    let end_pos_check = input.len() - MFLIMIT;
145    let mut literal_start = input_pos;
146    let mut cur = input_pos;
147
148    if cur == 0 && input_stream_offset == 0 {
149        let hash = T::get_hash_at_inbounds(input, 0);
150        if !READONLY {
151            table.put_at(hash, 0);
152        }
153        cur = 1;
154    }
155
156    let mut forward_hash = T::get_hash_at_inbounds(input, cur);
157
158    loop {
159        let mut candidate;
160        let mut candidate_source;
161        let mut offset;
162        let mut non_match_count = 1 << INCREASE_STEPSIZE_BITSHIFT;
163
164        loop {
165            let step = non_match_count >> INCREASE_STEPSIZE_BITSHIFT;
166            non_match_count += 1;
167            let next_cur = cur + step;
168
169            if next_cur > end_pos_check + 1 {
170                handle_last_literals(output, input, literal_start);
171                return Ok(output.pos() - output_start_pos);
172            }
173
174            let hash = forward_hash;
175            candidate = table.get_at(hash);
176            forward_hash = T::get_hash_at_inbounds(input, next_cur);
177            if !READONLY {
178                table.put_at(hash, cur + input_stream_offset);
179            }
180
181            debug_assert!(READONLY || candidate <= input_stream_offset + cur);
182
183            if candidate >= input_stream_offset
184                && input_stream_offset + cur - candidate <= MAX_DISTANCE
185            {
186                offset = (input_stream_offset + cur - candidate) as u16;
187                candidate -= input_stream_offset;
188                candidate_source = input;
189            } else if USE_DICT
190                && candidate >= ext_dict_stream_offset
191                && input_stream_offset + cur - candidate <= MAX_DISTANCE
192            {
193                offset = (input_stream_offset + cur - candidate) as u16;
194                candidate -= ext_dict_stream_offset;
195                candidate_source = ext_dict;
196            } else {
197                cur = next_cur;
198                continue;
199            }
200            let cand_bytes: u32 = crate::hashtable::get_batch_inbounds(candidate_source, candidate);
201            let curr_bytes: u32 = crate::hashtable::get_batch_inbounds(input, cur);
202
203            if cand_bytes == curr_bytes {
204                break;
205            }
206            cur = next_cur;
207        }
208
209        loop {
210            backtrack_match(
211                input,
212                &mut cur,
213                literal_start,
214                candidate_source,
215                &mut candidate,
216            );
217
218            let lit_len = cur - literal_start;
219
220            cur += MINMATCH;
221            candidate += MINMATCH;
222            let duplicate_length = crate::hashtable::count_same_bytes_inbounds(
223                input,
224                &mut cur,
225                candidate_source,
226                candidate,
227                END_OFFSET,
228            );
229
230            let hash = T::get_hash_at_inbounds(input, cur - 2);
231            if !READONLY {
232                table.put_at(hash, cur - 2 + input_stream_offset);
233            }
234
235            let token = token_from_literal_and_match_length(lit_len, duplicate_length);
236            push_byte(output, token);
237            if lit_len >= 0xF {
238                write_integer(output, lit_len - 0xF);
239            }
240            if lit_len > 0 {
241                copy_literals_wild(output, input, literal_start, lit_len);
242            }
243            push_u16(output, offset);
244            if duplicate_length >= 0xF {
245                write_integer(output, duplicate_length - 0xF);
246            }
247            literal_start = cur;
248
249            if !USE_DICT && cur <= end_pos_check {
250                let hash = T::get_hash_at_inbounds(input, cur);
251                let rematch = table.get_at(hash);
252
253                if input_stream_offset + cur - rematch <= MAX_DISTANCE
254                    && rematch >= input_stream_offset
255                {
256                    let rc = rematch - input_stream_offset;
257                    if crate::hashtable::get_batch_inbounds(input, cur)
258                        == crate::hashtable::get_batch_inbounds(input, rc)
259                    {
260                        table.put_at(hash, cur + input_stream_offset);
261                        candidate = rc;
262                        candidate_source = input;
263                        offset = (input_stream_offset + cur - rematch) as u16;
264                        continue;
265                    }
266                }
267                forward_hash = hash;
268            } else if cur <= end_pos_check {
269                forward_hash = T::get_hash_at_inbounds(input, cur);
270            }
271            break;
272        }
273    }
274}
275
276/// Compress with a caller-owned `HashTableU32`.
277///
278/// This is cross-crate plumbing for the frame encoder. It keeps the internal
279/// `HashTable` trait private, so downstream safe code cannot corrupt match
280/// finder invariants that protect unchecked reads.
281pub fn compress_into_sink_with_table<
282    const USE_DICT: bool,
283    const HAS_OFFSET: bool,
284    const READONLY: bool,
285    S: Sink,
286>(
287    input: &[u8],
288    input_pos: usize,
289    output: &mut S,
290    table: &mut HashTableU32,
291    ext_dict: &[u8],
292    input_stream_offset: usize,
293) -> Result<usize, CompressError> {
294    compress_internal::<_, USE_DICT, HAS_OFFSET, READONLY, _>(
295        input,
296        input_pos,
297        output,
298        table,
299        ext_dict,
300        input_stream_offset,
301    )
302}
303
304/// Seed a caller-owned `HashTableU32` from uncompressed input bytes.
305///
306/// This is cross-crate plumbing for the frame encoder. Dictionary-compressed
307/// linked frames compress the first block with the normal dictionary path, then
308/// seed the reusable linked-block table from that decoded block so later blocks
309/// can reference it.
310pub fn seed_table_with_input(table: &mut HashTableU32, input: &[u8], input_stream_offset: usize) {
311    let mut i = 0usize;
312    while i + core::mem::size_of::<usize>() <= input.len() {
313        let hash = <HashTableU32 as HashTable>::get_hash_at(input, i);
314        table.put_at(hash, i + input_stream_offset);
315        i += 3;
316    }
317}
318
319/// Dual-table compression for `CompressorRef::with_dict`.
320#[inline(never)]
321fn compress_with_dict_table<T: HashTable, S: Sink>(
322    input: &[u8],
323    output: &mut S,
324    table: &mut T,
325    dict_table: &T,
326    ext_dict: &[u8],
327    input_stream_offset: usize,
328) -> Result<usize, CompressError> {
329    debug_assert_eq!(input_stream_offset, ext_dict.len());
330    assert!(ext_dict.len() <= WINDOW_SIZE);
331    assert!(ext_dict.len() <= input_stream_offset);
332    assert!(
333        input_stream_offset
334            .checked_add(input.len())
335            .and_then(|i| i.checked_add(ext_dict.len()))
336            .is_some_and(|i| i <= isize::MAX as usize)
337    );
338    if output.capacity() - output.pos() < get_maximum_output_size(input.len()) {
339        return Err(CompressError::OutputTooSmall);
340    }
341
342    let output_start_pos = output.pos();
343    if input.len() < LZ4_MIN_LENGTH {
344        handle_last_literals(output, input, 0);
345        return Ok(output.pos() - output_start_pos);
346    }
347
348    let end_pos_check = input.len() - MFLIMIT;
349    let mut literal_start = 0;
350
351    let hash = T::get_hash_at_inbounds(input, 0);
352    table.put_at(hash, input_stream_offset);
353    let mut cur = 1;
354
355    let mut forward_hash = T::get_hash_at_inbounds(input, cur);
356
357    loop {
358        let mut candidate;
359        let candidate_source;
360        let offset;
361        let mut non_match_count = 1 << INCREASE_STEPSIZE_BITSHIFT;
362
363        loop {
364            let step = non_match_count >> INCREASE_STEPSIZE_BITSHIFT;
365            non_match_count += 1;
366            let next_cur = cur + step;
367
368            if next_cur > end_pos_check + 1 {
369                handle_last_literals(output, input, literal_start);
370                return Ok(output.pos() - output_start_pos);
371            }
372
373            let hash = forward_hash;
374            forward_hash = T::get_hash_at_inbounds(input, next_cur);
375            let curr_bytes: u32 = crate::hashtable::get_batch_inbounds(input, cur);
376
377            let main_candidate = table.get_at(hash);
378            table.put_at(hash, cur + input_stream_offset);
379
380            // Probe dict table first: for small inputs most matches come from
381            // the dict, and even for large inputs the dict covers the first
382            // window of repeated structure.
383            let dict_candidate = dict_table.get_at(hash);
384            if dict_candidate < input_stream_offset
385                && input_stream_offset + cur - dict_candidate <= MAX_DISTANCE
386            {
387                let cand_bytes: u32 =
388                    crate::hashtable::get_batch_inbounds(ext_dict, dict_candidate);
389                if cand_bytes == curr_bytes {
390                    offset = (input_stream_offset + cur - dict_candidate) as u16;
391                    candidate = dict_candidate;
392                    candidate_source = ext_dict;
393                    break;
394                }
395            }
396
397            if main_candidate >= input_stream_offset
398                && input_stream_offset + cur - main_candidate <= MAX_DISTANCE
399            {
400                let cand_bytes: u32 = crate::hashtable::get_batch_inbounds(
401                    input,
402                    main_candidate - input_stream_offset,
403                );
404                if cand_bytes == curr_bytes {
405                    offset = (input_stream_offset + cur - main_candidate) as u16;
406                    candidate = main_candidate - input_stream_offset;
407                    candidate_source = input;
408                    break;
409                }
410            }
411
412            cur = next_cur;
413        }
414
415        backtrack_match(
416            input,
417            &mut cur,
418            literal_start,
419            candidate_source,
420            &mut candidate,
421        );
422
423        let lit_len = cur - literal_start;
424
425        cur += MINMATCH;
426        candidate += MINMATCH;
427        let duplicate_length = crate::hashtable::count_same_bytes_inbounds(
428            input,
429            &mut cur,
430            candidate_source,
431            candidate,
432            END_OFFSET,
433        );
434
435        let hash = T::get_hash_at_inbounds(input, cur - 2);
436        table.put_at(hash, cur - 2 + input_stream_offset);
437
438        let token = token_from_literal_and_match_length(lit_len, duplicate_length);
439        push_byte(output, token);
440        if lit_len >= 0xF {
441            write_integer(output, lit_len - 0xF);
442        }
443        if lit_len > 0 {
444            copy_literals_wild(output, input, literal_start, lit_len);
445        }
446        push_u16(output, offset);
447        if duplicate_length >= 0xF {
448            write_integer(output, duplicate_length - 0xF);
449        }
450        literal_start = cur;
451
452        if cur <= end_pos_check {
453            forward_hash = T::get_hash_at_inbounds(input, cur);
454        }
455    }
456}
457
458#[inline]
459fn push_byte(output: &mut impl Sink, el: u8) {
460    output.push(el);
461}
462
463#[inline]
464fn push_u16(output: &mut impl Sink, el: u16) {
465    output.extend_from_slice(&el.to_le_bytes());
466}
467
468#[inline(always)]
469fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {
470    output.extend_from_slice_wild(&input[input_start..input_start + len], len)
471}
472
473/// Compress `input` into `output` with optional dictionary data.
474pub fn compress_into_sink_with_dict<const USE_DICT: bool>(
475    input: &[u8],
476    output: &mut impl Sink,
477    mut dict_data: &[u8],
478) -> Result<usize, CompressError> {
479    if USE_DICT && dict_data.len() < MINMATCH {
480        return compress_into_sink_with_dict::<false>(input, output, b"");
481    }
482    if dict_data.len() + input.len() < u16::MAX as usize {
483        let mut dict: HashTableU32U16 = HashTableU32U16::new();
484        init_dict(&mut dict, &mut dict_data);
485        compress_internal::<_, USE_DICT, USE_DICT, false, _>(
486            input,
487            0,
488            output,
489            &mut dict,
490            dict_data,
491            dict_data.len(),
492        )
493    } else {
494        let mut dict: HashTableU32 = HashTableU32::new();
495        init_dict(&mut dict, &mut dict_data);
496        compress_internal::<_, USE_DICT, USE_DICT, false, _>(
497            input,
498            0,
499            output,
500            &mut dict,
501            dict_data,
502            dict_data.len(),
503        )
504    }
505}
506
507#[inline]
508pub(crate) fn init_dict<T: HashTable>(dict: &mut T, dict_data: &mut &[u8]) {
509    if dict_data.len() > WINDOW_SIZE {
510        *dict_data = &dict_data[dict_data.len() - WINDOW_SIZE..];
511    }
512    let mut i = 0usize;
513    while i + core::mem::size_of::<usize>() <= dict_data.len() {
514        let hash = T::get_hash_at(dict_data, i);
515        dict.put_at(hash, i);
516        i += 3;
517    }
518}
519
520/// Returns the maximum output size of the compressed data.
521/// Can be used to preallocate capacity on the output vector.
522///
523/// Returns `usize::MAX` if the result would overflow (e.g. on 32-bit with >3.9 GB input).
524#[must_use]
525#[inline]
526pub const fn get_maximum_output_size(input_len: usize) -> usize {
527    let raw = 16u64 + 4 + (input_len as u64 * 110 / 100);
528    if raw > usize::MAX as u64 {
529        usize::MAX
530    } else {
531        raw as usize
532    }
533}
534
535/// Compress all bytes of `input` into `output`.
536/// output should be preallocated with a size of
537/// `get_maximum_output_size`.
538///
539/// Returns the number of bytes written (compressed) into `output`.
540#[inline]
541pub fn compress_into(input: &[u8], output: &mut [u8]) -> Result<usize, CompressError> {
542    compress_into_sink_with_dict::<false>(input, &mut VerifiedSliceSink::new(output, 0), b"")
543}
544
545/// Compress all bytes of `input` into `output` using an external dictionary.
546///
547/// Returns the number of bytes written (compressed) into `output`.
548#[inline]
549pub fn compress_into_with_dict(
550    input: &[u8],
551    output: &mut [u8],
552    dict: &[u8],
553) -> Result<usize, CompressError> {
554    compress_into_sink_with_dict::<true>(input, &mut VerifiedSliceSink::new(output, 0), dict)
555}
556
557/// Compress all bytes of `input`.
558#[must_use]
559#[cfg(feature = "alloc")]
560#[inline]
561pub fn compress(input: &[u8]) -> Vec<u8> {
562    let max_compressed_size = get_maximum_output_size(input.len());
563    let mut compressed: Vec<u8> = vec![0u8; max_compressed_size];
564    let compressed_len = compress_into_sink_with_dict::<false>(
565        input,
566        &mut VerifiedSliceSink::new(&mut compressed, 0),
567        b"",
568    )
569    .unwrap();
570    compressed.truncate(compressed_len);
571
572    compressed
573}
574
575/// A reusable no-dict block compressor with `N` hash-table entries.
576///
577/// [`CompressorRef`] is the standard-sized alias (8 KB table). Use this generic
578/// form to pick a smaller table for memory-constrained (e.g. embedded) targets,
579/// e.g. `CompressorRefN::<512>::new()` for a 2 KB table. `N` must be a power of
580/// two (checked at compile time).
581///
582/// This is the no-alloc API. With `alloc`, use [`Compressor`](crate::Compressor)
583/// instead. For one-shot compression, use [`compress_into`] instead.
584///
585/// # Example
586/// ```
587/// use lz4rip_encode::{CompressorRef, get_maximum_output_size};
588///
589/// let mut comp = CompressorRef::new();
590/// let input = b"hello world, hello world, hello!";
591/// let mut output = vec![0u8; get_maximum_output_size(input.len())];
592/// let compressed_len = comp.compress_into(input, &mut output).unwrap();
593/// ```
594pub struct CompressorRefN<const N: usize = DEFAULT_NODICT_ENTRIES> {
595    table: HashTableU32<N>,
596    stream_offset: usize,
597}
598
599/// A reusable no-dict block compressor with the standard 8 KB hash table.
600pub type CompressorRef = CompressorRefN<DEFAULT_NODICT_ENTRIES>;
601
602impl<const N: usize> fmt::Debug for CompressorRefN<N> {
603    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
604        f.debug_struct("CompressorRef").finish_non_exhaustive()
605    }
606}
607
608impl<const N: usize> CompressorRefN<N> {
609    /// Create a new compressor without a dictionary.
610    #[must_use]
611    pub fn new() -> Self {
612        CompressorRefN {
613            table: HashTableU32::<N>::new(),
614            stream_offset: 0,
615        }
616    }
617
618    /// Compress `input` into `output`, returning the number of compressed bytes.
619    ///
620    /// `output` must be at least [`get_maximum_output_size`]`(input.len())` bytes.
621    pub fn compress_into(
622        &mut self,
623        input: &[u8],
624        output: &mut [u8],
625    ) -> Result<usize, CompressError> {
626        compress_plain_table(&mut self.table, &mut self.stream_offset, input, output)
627    }
628
629    /// Compress `input` into a new `Vec<u8>`.
630    #[cfg(feature = "alloc")]
631    pub fn compress(&mut self, input: &[u8]) -> Vec<u8> {
632        let max_compressed = get_maximum_output_size(input.len());
633        let mut compressed = vec![0u8; max_compressed];
634        let compressed_len = self.compress_into(input, &mut compressed).unwrap();
635        compressed.truncate(compressed_len);
636
637        compressed
638    }
639}
640
641/// A reusable dict block compressor (borrowing) with `N` entries per table.
642///
643/// [`DictCompressorRef`] is the standard-sized alias (two 8 KB tables). Use this
644/// generic form to pick smaller tables for memory-constrained targets, e.g.
645/// `DictCompressorRefN::<1024>::new(dict)` for two 2 KB tables. `N` must be a
646/// power of two (checked at compile time).
647///
648/// This is the no-alloc dict API. With `alloc`, use
649/// [`DictCompressor`](crate::DictCompressor) instead. Without a dictionary, use
650/// [`CompressorRef`].
651///
652/// # Example
653/// ```
654/// use lz4rip_encode::{DictCompressorRef, get_maximum_output_size};
655///
656/// let dict = b"the quick brown fox";
657/// let mut comp = DictCompressorRef::new(dict);
658/// let input = b"the quick brown fox jumps";
659/// let mut output = vec![0u8; get_maximum_output_size(input.len())];
660/// let compressed_len = comp.compress_into(input, &mut output).unwrap();
661/// ```
662pub struct DictCompressorRefN<'a, const N: usize = DEFAULT_DICT_ENTRIES> {
663    table: HashTableU32U16<N>,
664    pristine: HashTableU32U16<N>,
665    dict: &'a [u8],
666}
667
668/// A reusable dict block compressor (borrowing) with the standard 8 KB tables.
669pub type DictCompressorRef<'a> = DictCompressorRefN<'a, DEFAULT_DICT_ENTRIES>;
670
671impl<const N: usize> fmt::Debug for DictCompressorRefN<'_, N> {
672    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
673        f.debug_struct("DictCompressorRef")
674            .field("dict_len", &self.dict.len())
675            .finish()
676    }
677}
678
679impl<'a, const N: usize> DictCompressorRefN<'a, N> {
680    /// Create a new compressor seeded with an external dictionary.
681    ///
682    /// If `dict` is longer than the LZ4 window it is trimmed to the last
683    /// [`WINDOW_SIZE`](lz4rip_core::WINDOW_SIZE) bytes. A dictionary shorter than
684    /// 4 bytes is ignored (no dict matches); use [`CompressorRef`] for that case.
685    #[must_use]
686    pub fn new(dict: &'a [u8]) -> Self {
687        let trimmed = if dict.len() < MINMATCH {
688            b"".as_slice()
689        } else if dict.len() > WINDOW_SIZE {
690            &dict[dict.len() - WINDOW_SIZE..]
691        } else {
692            dict
693        };
694        let mut pristine = HashTableU32U16::<N>::new();
695        let mut dict_ref = trimmed;
696        init_dict(&mut pristine, &mut dict_ref);
697        DictCompressorRefN {
698            table: HashTableU32U16::<N>::new(),
699            pristine,
700            dict: trimmed,
701        }
702    }
703
704    /// Compress `input` into `output`, returning the number of compressed bytes.
705    ///
706    /// `output` must be at least [`get_maximum_output_size`]`(input.len())` bytes.
707    pub fn compress_into(
708        &mut self,
709        input: &[u8],
710        output: &mut [u8],
711    ) -> Result<usize, CompressError> {
712        compress_dict_tables(
713            &mut self.table,
714            &mut self.pristine,
715            self.dict,
716            input,
717            output,
718        )
719    }
720
721    /// Compress `input` into a new `Vec<u8>`.
722    #[cfg(feature = "alloc")]
723    pub fn compress(&mut self, input: &[u8]) -> Vec<u8> {
724        let max_compressed = get_maximum_output_size(input.len());
725        let mut compressed = vec![0u8; max_compressed];
726        let compressed_len = self.compress_into(input, &mut compressed).unwrap();
727        compressed.truncate(compressed_len);
728
729        compressed
730    }
731}
732
733/// Compress `input` using the dict main + pristine tables. Shared by
734/// [`CompressorRef::compress_into`] and the owning [`Compressor`] so the dict
735/// branch logic lives in one place regardless of how the tables are stored.
736pub(crate) fn compress_dict_tables<const N: usize>(
737    table: &mut HashTableU32U16<N>,
738    pristine: &mut HashTableU32U16<N>,
739    dict: &[u8],
740    input: &[u8],
741    output: &mut [u8],
742) -> Result<usize, CompressError> {
743    if input.len() <= DICT_READONLY_LIMIT && dict.len() + input.len() < u16::MAX as usize {
744        compress_internal::<_, true, true, true, _>(
745            input,
746            0,
747            &mut VerifiedSliceSink::new(output, 0),
748            pristine,
749            dict,
750            dict.len(),
751        )
752    } else if dict.len() + input.len() < u16::MAX as usize {
753        table.clear();
754        compress_with_dict_table(
755            input,
756            &mut VerifiedSliceSink::new(output, 0),
757            table,
758            pristine,
759            dict,
760            dict.len(),
761        )
762    } else {
763        // dict + input >= 64 KB: positions overflow u16, so use a u32 table sized
764        // to this compressor's `N` (honors the const-generic knob instead of
765        // allocating a standard 8 KB table).
766        let mut u32_table = HashTableU32::<N>::new();
767        let mut dict_data = dict;
768        init_dict(&mut u32_table, &mut dict_data);
769        compress_internal::<_, true, true, false, _>(
770            input,
771            0,
772            &mut VerifiedSliceSink::new(output, 0),
773            &mut u32_table,
774            dict_data,
775            dict_data.len(),
776        )
777    }
778}
779
780/// Compress `input` using the plain (no-dict) table with epoch-based reuse.
781/// Shared by [`CompressorRef::compress_into`] and the owning [`Compressor`].
782pub(crate) fn compress_plain_table<const N: usize>(
783    table: &mut HashTableU32<N>,
784    stream_offset: &mut usize,
785    input: &[u8],
786    output: &mut [u8],
787) -> Result<usize, CompressError> {
788    let offset = prepare_plain_table(table, stream_offset, input.len());
789    if offset > 0 {
790        compress_internal::<_, false, true, false, _>(
791            input,
792            0,
793            &mut VerifiedSliceSink::new(output, 0),
794            table,
795            b"",
796            offset,
797        )
798    } else {
799        compress_internal::<_, false, false, false, _>(
800            input,
801            0,
802            &mut VerifiedSliceSink::new(output, 0),
803            table,
804            b"",
805            0,
806        )
807    }
808}
809
810#[inline]
811fn prepare_plain_table<const N: usize>(
812    table: &mut HashTableU32<N>,
813    stream_offset: &mut usize,
814    input_len: usize,
815) -> usize {
816    if input_len > EPOCH_THRESHOLD {
817        table.clear();
818        *stream_offset = input_len + MAX_DISTANCE + 1;
819        return 0;
820    }
821    let offset = *stream_offset;
822    let next = offset
823        .checked_add(input_len)
824        .and_then(|v| v.checked_add(MAX_DISTANCE + 1));
825    if let Some(next) = next.filter(|&n| n <= u32::MAX as usize) {
826        *stream_offset = next;
827    } else {
828        table.clear();
829        *stream_offset = input_len + MAX_DISTANCE + 1;
830    }
831    offset
832}
833
834impl<const N: usize> Default for CompressorRefN<N> {
835    fn default() -> Self {
836        Self::new()
837    }
838}
839
840#[cfg(test)]
841mod tests {
842    use super::*;
843
844    fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize {
845        const USIZE_SIZE: usize = core::mem::size_of::<usize>();
846        let cur_slice = &input[*cur..input.len() - END_OFFSET];
847        let cand_slice = &source[candidate..];
848
849        let mut num = 0;
850        for (block1, block2) in cur_slice
851            .chunks_exact(USIZE_SIZE)
852            .zip(cand_slice.chunks_exact(USIZE_SIZE))
853        {
854            let input_block = usize::from_ne_bytes(block1.try_into().unwrap());
855            let match_block = usize::from_ne_bytes(block2.try_into().unwrap());
856
857            if input_block == match_block {
858                num += USIZE_SIZE;
859            } else {
860                let diff = input_block ^ match_block;
861                num += (diff.to_le().trailing_zeros() / 8) as usize;
862                *cur += num;
863                return num;
864            }
865        }
866
867        #[cold]
868        fn count_same_bytes_tail(a: &[u8], b: &[u8], offset: usize) -> usize {
869            a.iter()
870                .zip(b)
871                .skip(offset)
872                .take_while(|(a, b)| a == b)
873                .count()
874        }
875        num += count_same_bytes_tail(cur_slice, cand_slice, num);
876
877        *cur += num;
878        num
879    }
880
881    #[test]
882    fn test_count_same_bytes() {
883        let first: &[u8] = &[
884            1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
885        ];
886        let second: &[u8] = &[
887            1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
888        ];
889        assert_eq!(count_same_bytes(first, &mut 0, second, 0), 16);
890
891        for diff_idx in 8..100 {
892            let first: Vec<u8> = (0u8..255).cycle().take(100 + 12).collect();
893            let mut second = first.clone();
894            second[diff_idx] = 255;
895            for start in 0..=diff_idx {
896                let same_bytes = count_same_bytes(&first, &mut start.clone(), &second, start);
897                assert_eq!(same_bytes, diff_idx - start);
898            }
899        }
900    }
901
902    #[test]
903    fn test_bug() {
904        let input: &[u8] = &[
905            10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
906        ];
907        let mut output = [0u8; get_maximum_output_size(20)];
908        let _ = compress_into(input, &mut output).unwrap();
909    }
910
911    #[test]
912    fn test_conformant_last_block() {
913        let aaas: &[u8] = b"aaaaaaaaaaaaaaa";
914
915        let mut out = [0u8; get_maximum_output_size(15)];
916        let n = compress_into(&aaas[..12], &mut out).unwrap();
917        assert!(n > 12);
918        let n = compress_into(&aaas[..13], &mut out).unwrap();
919        assert!(n <= 13);
920        let n = compress_into(&aaas[..14], &mut out).unwrap();
921        assert!(n <= 14);
922        let n = compress_into(&aaas[..15], &mut out).unwrap();
923        assert!(n <= 15);
924    }
925}