Skip to main content

lzf_rust/raw/
encoder.rs

1// SPDX-License-Identifier: BSD-2-Clause
2// Derived from liblzf encoder logic by Stefan Traby and Marc Lehmann.
3// See LICENSES/BSD-2-Clause-liblzf.txt for the preserved upstream notice.
4use crate::{Error, MAX_LITERAL_LEN, MAX_MATCH_LEN, MAX_OFFSET, Result};
5
6const HASH_LOG: usize = 16;
7const HASH_SIZE: usize = 1 << HASH_LOG;
8const HASH_BEST_SIZE: usize = 1 << HASH_LOG;
9
10/// Encoder mode for raw LZF compression.
11#[derive(Clone, Copy, Debug, PartialEq, Eq)]
12pub enum CompressionMode {
13    /// Fast/liblzf default mode (`lzf_compress`).
14    Normal,
15    /// Best-compression mode (`lzf_compress_best`).
16    Best,
17}
18
19#[inline]
20fn hash3(input: &[u8], index: usize) -> usize {
21    let v = (u32::from(input[index]) << 16)
22        | (u32::from(input[index + 1]) << 8)
23        | u32::from(input[index + 2]);
24    ((v.wrapping_mul(0x1e35_a7bd) >> (32 - HASH_LOG - 8)) as usize) & (HASH_SIZE - 1)
25}
26
27#[inline]
28fn hash_best3(input: &[u8], index: usize) -> usize {
29    ((usize::from(input[index]) << 6)
30        ^ (usize::from(input[index + 1]) << 3)
31        ^ usize::from(input[index + 2]))
32        & (HASH_BEST_SIZE - 1)
33}
34
35#[inline]
36fn emit_literals(
37    input: &[u8],
38    out: &mut [u8],
39    op: &mut usize,
40    start: usize,
41    end: usize,
42) -> Result<()> {
43    let len = end - start;
44    if len == 0 {
45        return Ok(());
46    }
47    if len <= MAX_LITERAL_LEN {
48        let needed = 1 + len;
49        if *op + needed > out.len() {
50            return Err(Error::OutputTooSmall);
51        }
52        out[*op] = (len - 1) as u8;
53        *op += 1;
54        out[*op..*op + len].copy_from_slice(&input[start..end]);
55        *op += len;
56        return Ok(());
57    }
58
59    let mut cursor = start;
60    while cursor < end {
61        let chunk = (end - cursor).min(MAX_LITERAL_LEN);
62        let needed = 1 + chunk;
63        if *op + needed > out.len() {
64            return Err(Error::OutputTooSmall);
65        }
66
67        out[*op] = (chunk - 1) as u8;
68        *op += 1;
69        out[*op..*op + chunk].copy_from_slice(&input[cursor..cursor + chunk]);
70        *op += chunk;
71        cursor += chunk;
72    }
73    Ok(())
74}
75
76#[inline]
77fn emit_backref(out: &mut [u8], op: &mut usize, off: usize, len: usize) -> Result<()> {
78    debug_assert!(off < MAX_OFFSET);
79    debug_assert!((3..=MAX_MATCH_LEN).contains(&len));
80
81    let l = len - 2;
82    let needed = if l < 7 { 2 } else { 3 };
83    if *op + needed > out.len() {
84        return Err(Error::OutputTooSmall);
85    }
86
87    if l < 7 {
88        out[*op] = ((l as u8) << 5) | ((off >> 8) as u8);
89        *op += 1;
90    } else {
91        out[*op] = (7u8 << 5) | ((off >> 8) as u8);
92        out[*op + 1] = (l - 7) as u8;
93        *op += 2;
94    }
95
96    out[*op] = (off & 0xff) as u8;
97    *op += 1;
98    Ok(())
99}
100
101fn compress_normal(input: &[u8], output: &mut [u8]) -> Result<usize> {
102    if input.is_empty() {
103        return Ok(0);
104    }
105
106    let mut table = [0u32; HASH_SIZE];
107    let mut op = 0usize;
108    let mut anchor = 0usize;
109    let mut pos = 0usize;
110
111    while pos + 2 < input.len() {
112        let h = hash3(input, pos);
113        let prev = table[h] as usize;
114        table[h] = (pos + 1) as u32;
115
116        if prev != 0 {
117            let candidate = prev - 1;
118            if candidate < pos {
119                let off = pos - candidate - 1;
120                if off < MAX_OFFSET
121                    && input[candidate] == input[pos]
122                    && input[candidate + 1] == input[pos + 1]
123                    && input[candidate + 2] == input[pos + 2]
124                {
125                    emit_literals(input, output, &mut op, anchor, pos)?;
126
127                    let max_len = (input.len() - pos).min(MAX_MATCH_LEN);
128                    let mut len = 3usize;
129                    while len < max_len && input[candidate + len] == input[pos + len] {
130                        len += 1;
131                    }
132
133                    emit_backref(output, &mut op, off, len)?;
134
135                    let end = pos + len;
136                    let mut scan = pos + 1;
137                    while scan + 2 < end {
138                        let hh = hash3(input, scan);
139                        table[hh] = (scan + 1) as u32;
140                        scan += 1;
141                    }
142
143                    pos = end;
144                    anchor = pos;
145                    continue;
146                }
147            }
148        }
149
150        pos += 1;
151    }
152
153    emit_literals(input, output, &mut op, anchor, input.len())?;
154    Ok(op)
155}
156
157fn compress_best_impl(input: &[u8], output: &mut [u8]) -> Result<usize> {
158    if input.is_empty() {
159        return Ok(0);
160    }
161
162    // liblzf stores pointers; we store index+1 (0 == null).
163    let mut first = [0usize; HASH_BEST_SIZE];
164    let mut prev = [0u16; MAX_OFFSET];
165
166    let in_len = input.len();
167    let mut op = 0usize;
168    let mut anchor = 0usize;
169    let mut pos = 0usize;
170
171    while pos + 2 < in_len {
172        let hash = hash_best3(input, pos);
173        let prev_head = first[hash];
174        let slot = pos & (MAX_OFFSET - 1);
175
176        prev[slot] = if prev_head == 0 {
177            0
178        } else {
179            let p = prev_head - 1;
180            (pos - p).min(usize::from(u16::MAX)) as u16
181        };
182        first[hash] = pos + 1;
183
184        let mut best_len = 0usize;
185        let mut best_pos = 0usize;
186        let max_len = (in_len - pos).min(MAX_MATCH_LEN);
187        let lower_bound = pos.saturating_sub(MAX_OFFSET);
188
189        if prev_head != 0 {
190            let mut p = prev_head - 1;
191            let pos0 = input[pos];
192            let pos1 = input[pos + 1];
193            let pos2 = input[pos + 2];
194
195            while p >= lower_bound {
196                if input[p] == pos0
197                    && input[p + 1] == pos1
198                    && input[p + 2] == pos2
199                    && (best_len == 0 || input[p + best_len] == input[pos + best_len])
200                {
201                    let mut l = 3usize;
202                    while l < max_len && input[p + l] == input[pos + l] {
203                        l += 1;
204                    }
205
206                    if l >= best_len {
207                        best_len = l;
208                        best_pos = p;
209                        if l == max_len {
210                            break;
211                        }
212                    }
213                }
214
215                let diff = usize::from(prev[p & (MAX_OFFSET - 1)]);
216                if diff == 0 || p < diff {
217                    break;
218                }
219                p -= diff;
220            }
221        }
222
223        if best_len >= 3 {
224            emit_literals(input, output, &mut op, anchor, pos)?;
225
226            let off = pos - best_pos - 1;
227            emit_backref(output, &mut op, off, best_len)?;
228
229            let end = pos + best_len;
230            let mut scan = pos + 1;
231            while scan + 2 < end {
232                let h = hash_best3(input, scan);
233                let s = scan & (MAX_OFFSET - 1);
234                let head = first[h];
235
236                prev[s] = if head == 0 {
237                    0
238                } else {
239                    let p = head - 1;
240                    (scan - p).min(usize::from(u16::MAX)) as u16
241                };
242                first[h] = scan + 1;
243                scan += 1;
244            }
245
246            pos = end;
247            anchor = pos;
248        } else {
249            pos += 1;
250        }
251    }
252
253    emit_literals(input, output, &mut op, anchor, input.len())?;
254    Ok(op)
255}
256
257/// Compresses `input` into `output` using raw LZF format.
258///
259/// Uses the default liblzf mode (`lzf_compress`).
260///
261/// Returns `Error::OutputTooSmall` if `output` cannot hold the encoded stream.
262///
263/// For a guaranteed-capacity buffer, use `max_compressed_size(input.len())`.
264pub fn compress(input: &[u8], output: &mut [u8]) -> Result<usize> {
265    compress_with_mode(input, output, CompressionMode::Normal)
266}
267
268/// Compresses `input` into `output` using liblzf best-compression mode.
269///
270/// This mirrors `lzf_compress_best` semantics.
271///
272/// Returns `Error::OutputTooSmall` if `output` cannot hold the encoded stream.
273pub fn compress_best(input: &[u8], output: &mut [u8]) -> Result<usize> {
274    compress_best_impl(input, output)
275}
276
277/// Compresses `input` into `output` using the given encoder mode.
278///
279/// - `CompressionMode::Normal` tracks `liblzf` default compressor behavior.
280/// - `CompressionMode::Best` tracks `liblzf` best-ratio compressor behavior.
281pub fn compress_with_mode(input: &[u8], output: &mut [u8], mode: CompressionMode) -> Result<usize> {
282    match mode {
283        CompressionMode::Normal => compress_normal(input, output),
284        CompressionMode::Best => compress_best_impl(input, output),
285    }
286}