Skip to main content

zrip_encode/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod primitives;
13pub(crate) mod sequences;
14pub mod strategy;
15#[cfg(feature = "std")]
16pub mod streaming;
17
18#[cfg(feature = "alloc")]
19use alloc::vec;
20#[cfg(feature = "alloc")]
21use alloc::vec::Vec;
22
23use crate::strategy::Strategy;
24use zrip_core::error::CompressError;
25use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
26use zrip_core::xxhash::xxh64;
27
28pub(crate) fn write_frame_header(output: &mut Vec<u8>, content_size: usize, dict_id: Option<u32>) {
29    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
30
31    let fcs_size = if content_size <= 255 {
32        1
33    } else if content_size <= 0xFFFF + 256 {
34        2
35    } else if content_size <= 0xFFFF_FFFF {
36        4
37    } else {
38        8
39    };
40    let fcs_flag: u8 = match fcs_size {
41        1 => 0,
42        2 => 1,
43        4 => 2,
44        _ => 3,
45    };
46
47    let dict_id_flag: u8 = match dict_id {
48        None => 0,
49        Some(id) if id <= 0xFF => 1,
50        Some(id) if id <= 0xFFFF => 2,
51        Some(_) => 3,
52    };
53
54    let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
55    output.push(descriptor);
56
57    match dict_id {
58        Some(id) if id <= 0xFF => output.push(id as u8),
59        Some(id) if id <= 0xFFFF => output.extend_from_slice(&(id as u16).to_le_bytes()),
60        Some(id) => output.extend_from_slice(&id.to_le_bytes()),
61        None => {}
62    }
63
64    match fcs_size {
65        1 => output.push(content_size as u8),
66        2 => {
67            let v = (content_size - 256) as u16;
68            output.extend_from_slice(&v.to_le_bytes());
69        }
70        4 => output.extend_from_slice(&(content_size as u32).to_le_bytes()),
71        _ => output.extend_from_slice(&(content_size as u64).to_le_bytes()),
72    }
73}
74
75pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
76    const SAMPLE: usize = 1024;
77    const DISTINCT_THRESHOLD: u32 = 200;
78    const MAX_FREQ_DENOM: u32 = 24;
79    if data.len() < SAMPLE {
80        return false;
81    }
82    let mut counts = [0u16; 256];
83    for &b in &data[..SAMPLE] {
84        counts[b as usize] += 1;
85    }
86    let mut distinct: u32 = 0;
87    let mut max_freq: u16 = 0;
88    for &c in &counts {
89        distinct += (c > 0) as u32;
90        max_freq = max_freq.max(c);
91    }
92    distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
93}
94
95pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
96    if src_len >= 2 {
97        let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
98        params.hash_log = params.hash_log.min(src_log);
99        params.chain_log = params.chain_log.min(src_log);
100        params.window_log = params.window_log.min(src_log);
101    }
102}
103
104pub fn compress_with_params(
105    input: &[u8],
106    params: &strategy::LevelParams,
107) -> Result<Vec<u8>, CompressError> {
108    let mut params = *params;
109    clamp_params_to_src_size(&mut params, input.len());
110    compress_inner(input, &params)
111}
112
113pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
114    let params = strategy::level_params_for_size(level, input.len())
115        .ok_or(CompressError::InvalidLevel(level))?;
116    compress_inner(input, &params)
117}
118
119#[allow(clippy::unnecessary_wraps)]
120fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
121    let mut output = Vec::with_capacity(input.len() + 32);
122    compress_frame(input, params, &mut output);
123    Ok(output)
124}
125
126fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
127    write_frame_header(output, input.len(), None);
128
129    if input.is_empty() {
130        block_encoder::encode_raw_block(&[], true, output);
131    } else {
132        let hash_size = 1usize << params.hash_log;
133        let mut rep_offsets = [1u32, 4, 8];
134        let mut offset = 0;
135        let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
136        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
137
138        match params.strategy {
139            Strategy::Fast => {
140                let mut hash_table = vec![0u32; hash_size];
141                while offset < input.len() {
142                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
143                    let block_end = offset + chunk_size;
144                    let is_last = block_end >= input.len();
145
146                    if block_looks_incompressible(&input[offset..block_end]) {
147                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
148                    } else {
149                        fast::compress_fast_block(
150                            input,
151                            offset,
152                            block_end,
153                            params,
154                            &rep_offsets,
155                            &mut hash_table,
156                            &mut sequences,
157                        );
158                        if params.force_raw_literals {
159                            block_encoder::encode_compressed_block_raw(
160                                &input[offset..block_end],
161                                &sequences,
162                                &mut rep_offsets,
163                                is_last,
164                                output,
165                                &mut workspace,
166                            );
167                        } else {
168                            block_encoder::encode_compressed_block(
169                                &input[offset..block_end],
170                                &sequences,
171                                &mut rep_offsets,
172                                is_last,
173                                output,
174                                &mut workspace,
175                            );
176                        }
177                    }
178                    offset = block_end;
179                }
180            }
181            Strategy::DFast => {
182                let short_size = 1usize << params.chain_log;
183                let long_size = 1usize << params.hash_log;
184                let mut hash_short = vec![0u32; short_size];
185                let mut hash_long = vec![0u32; long_size];
186                while offset < input.len() {
187                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
188                    let block_end = offset + chunk_size;
189                    let is_last = block_end >= input.len();
190
191                    if block_looks_incompressible(&input[offset..block_end]) {
192                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
193                    } else {
194                        dfast::compress_dfast_block(
195                            input,
196                            offset,
197                            block_end,
198                            params,
199                            &rep_offsets,
200                            &mut hash_short,
201                            &mut hash_long,
202                            &mut sequences,
203                        );
204                        block_encoder::encode_compressed_block(
205                            &input[offset..block_end],
206                            &sequences,
207                            &mut rep_offsets,
208                            is_last,
209                            output,
210                            &mut workspace,
211                        );
212                    }
213                    offset = block_end;
214                }
215            }
216        }
217    }
218
219    let hash = xxh64(input, 0);
220    let checksum = (hash & 0xFFFF_FFFF) as u32;
221    output.extend_from_slice(&checksum.to_le_bytes());
222}
223
224pub fn compress_with_dict(
225    input: &[u8],
226    level: i32,
227    dict: &zrip_core::dict::Dictionary,
228) -> Result<Vec<u8>, CompressError> {
229    let total_window = dict.content().len() + input.len();
230    let params = strategy::level_params_for_size(level, total_window)
231        .ok_or(CompressError::InvalidLevel(level))?;
232
233    let mut output = Vec::with_capacity(input.len() + 32);
234    write_frame_header(&mut output, input.len(), Some(dict.id()));
235
236    if input.is_empty() {
237        block_encoder::encode_raw_block(&[], true, &mut output);
238    } else {
239        let prefix = dict.content();
240        let mut rep_offsets = *dict.rep_offsets();
241        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
242
243        workspace.prev_ll = dict
244            .ll_table()
245            .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 35));
246        workspace.prev_of = dict
247            .of_table()
248            .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 31));
249        workspace.prev_ml = dict
250            .ml_table()
251            .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 52));
252        workspace.prev_huffman = dict.huf_table().and_then(|(dt, tl)| {
253            zrip_core::huffman::encode::HuffmanEncodeTable::from_decode_table(dt, tl)
254        });
255
256        if input.len() <= MAX_BLOCK_SIZE {
257            let sequences = match params.strategy {
258                Strategy::Fast => {
259                    fast::compress_fast_with_prefix(input, &params, &rep_offsets, prefix)
260                }
261                Strategy::DFast => {
262                    dfast::compress_dfast_with_prefix(input, &params, &rep_offsets, prefix)
263                }
264            };
265            if params.force_raw_literals {
266                block_encoder::encode_compressed_block_raw(
267                    input,
268                    &sequences,
269                    &mut rep_offsets,
270                    true,
271                    &mut output,
272                    &mut workspace,
273                );
274            } else {
275                block_encoder::encode_compressed_block(
276                    input,
277                    &sequences,
278                    &mut rep_offsets,
279                    true,
280                    &mut output,
281                    &mut workspace,
282                );
283            }
284        } else {
285            let mut combined = Vec::with_capacity(prefix.len() + input.len());
286            combined.extend_from_slice(prefix);
287            combined.extend_from_slice(input);
288            let plen = prefix.len();
289            let hash_size = 1usize << params.hash_log;
290            let mut sequences = Vec::new();
291
292            match params.strategy {
293                Strategy::Fast => {
294                    let mut hash_table = vec![0u32; hash_size];
295                    fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
296                    let mut offset = 0;
297                    while offset < input.len() {
298                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
299                        let is_last = offset + chunk_size >= input.len();
300                        fast::compress_fast_block(
301                            &combined,
302                            plen + offset,
303                            plen + offset + chunk_size,
304                            &params,
305                            &rep_offsets,
306                            &mut hash_table,
307                            &mut sequences,
308                        );
309                        if params.force_raw_literals {
310                            block_encoder::encode_compressed_block_raw(
311                                &input[offset..offset + chunk_size],
312                                &sequences,
313                                &mut rep_offsets,
314                                is_last,
315                                &mut output,
316                                &mut workspace,
317                            );
318                        } else {
319                            block_encoder::encode_compressed_block(
320                                &input[offset..offset + chunk_size],
321                                &sequences,
322                                &mut rep_offsets,
323                                is_last,
324                                &mut output,
325                                &mut workspace,
326                            );
327                        }
328                        offset += chunk_size;
329                    }
330                }
331                Strategy::DFast => {
332                    let short_size = 1usize << params.chain_log;
333                    let long_size = 1usize << params.hash_log;
334                    let mut hash_short = vec![0u32; short_size];
335                    let mut hash_long = vec![0u32; long_size];
336                    dfast::prefill_hash_tables(
337                        &combined,
338                        plen,
339                        params.hash_log,
340                        params.chain_log,
341                        params.min_match,
342                        &mut hash_short,
343                        &mut hash_long,
344                    );
345                    let mut offset = 0;
346                    while offset < input.len() {
347                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
348                        let is_last = offset + chunk_size >= input.len();
349                        dfast::compress_dfast_block(
350                            &combined,
351                            plen + offset,
352                            plen + offset + chunk_size,
353                            &params,
354                            &rep_offsets,
355                            &mut hash_short,
356                            &mut hash_long,
357                            &mut sequences,
358                        );
359                        block_encoder::encode_compressed_block(
360                            &input[offset..offset + chunk_size],
361                            &sequences,
362                            &mut rep_offsets,
363                            is_last,
364                            &mut output,
365                            &mut workspace,
366                        );
367                        offset += chunk_size;
368                    }
369                }
370            }
371        }
372    }
373
374    let hash = xxh64(input, 0);
375    let checksum = (hash & 0xFFFF_FFFF) as u32;
376    output.extend_from_slice(&checksum.to_le_bytes());
377
378    Ok(output)
379}
380
381pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
382    let params = strategy::level_params_for_size(level, input.len())
383        .ok_or(CompressError::InvalidLevel(level))?;
384    let mut buf = Vec::with_capacity(output.len());
385    compress_frame(input, &params, &mut buf);
386    if buf.len() > output.len() {
387        return Err(CompressError::OutputTooSmall);
388    }
389    output[..buf.len()].copy_from_slice(&buf);
390    Ok(buf.len())
391}