Skip to main content

zrip_encode/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod sequences;
13pub mod strategy;
14#[cfg(feature = "std")]
15pub mod streaming;
16
17#[cfg(feature = "alloc")]
18use alloc::vec::Vec;
19
20use crate::strategy::Strategy;
21use zrip_core::error::CompressError;
22use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
23use zrip_core::xxhash::xxh64;
24
25pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
26    const SAMPLE: usize = 1024;
27    const DISTINCT_THRESHOLD: u32 = 200;
28    const MAX_FREQ_DENOM: u32 = 24;
29    if data.len() < SAMPLE {
30        return false;
31    }
32    let mut counts = [0u16; 256];
33    for &b in &data[..SAMPLE] {
34        counts[b as usize] += 1;
35    }
36    let mut distinct: u32 = 0;
37    let mut max_freq: u16 = 0;
38    for &c in &counts {
39        distinct += (c > 0) as u32;
40        max_freq = max_freq.max(c);
41    }
42    distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
43}
44
45pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
46    if src_len >= 2 {
47        let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
48        params.hash_log = params.hash_log.min(src_log);
49        params.chain_log = params.chain_log.min(src_log);
50        params.window_log = params.window_log.min(src_log);
51    }
52}
53
54pub fn compress_with_params(
55    input: &[u8],
56    params: &strategy::LevelParams,
57) -> Result<Vec<u8>, CompressError> {
58    let mut params = *params;
59    clamp_params_to_src_size(&mut params, input.len());
60    compress_inner(input, &params)
61}
62
63pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
64    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
65    clamp_params_to_src_size(&mut params, input.len());
66    compress_inner(input, &params)
67}
68
69fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
70    let mut output = Vec::with_capacity(input.len() + 32);
71    compress_frame(input, params, &mut output);
72    Ok(output)
73}
74
75fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
76    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
77
78    let fcs_size = if input.len() <= 255 {
79        1
80    } else if input.len() <= 0xFFFF + 256 {
81        2
82    } else if input.len() <= 0xFFFFFFFF {
83        4
84    } else {
85        8
86    };
87
88    let fcs_flag = match fcs_size {
89        1 => 0,
90        2 => 1,
91        4 => 2,
92        8 => 3,
93        _ => unreachable!(),
94    };
95
96    let descriptor = 0x20 | 0x04 | (fcs_flag << 6);
97    output.push(descriptor);
98
99    match fcs_size {
100        1 => output.push(input.len() as u8),
101        2 => {
102            let v = (input.len() - 256) as u16;
103            output.extend_from_slice(&v.to_le_bytes());
104        }
105        4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
106        8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
107        _ => unreachable!(),
108    }
109
110    if input.is_empty() {
111        block_encoder::encode_raw_block(&[], true, output);
112    } else {
113        let hash_size = 1usize << params.hash_log;
114        let mut rep_offsets = [1u32, 4, 8];
115        let mut offset = 0;
116        let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
117        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
118
119        match params.strategy {
120            Strategy::Fast => {
121                let mut hash_table = vec![0u32; hash_size];
122                while offset < input.len() {
123                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
124                    let block_end = offset + chunk_size;
125                    let is_last = block_end >= input.len();
126
127                    if block_looks_incompressible(&input[offset..block_end]) {
128                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
129                    } else {
130                        fast::compress_fast_block(
131                            input,
132                            offset,
133                            block_end,
134                            params,
135                            &rep_offsets,
136                            &mut hash_table,
137                            &mut sequences,
138                        );
139                        if params.force_raw_literals {
140                            block_encoder::encode_compressed_block_raw(
141                                &input[offset..block_end],
142                                &sequences,
143                                &mut rep_offsets,
144                                is_last,
145                                output,
146                                &mut workspace,
147                            );
148                        } else {
149                            block_encoder::encode_compressed_block(
150                                &input[offset..block_end],
151                                &sequences,
152                                &mut rep_offsets,
153                                is_last,
154                                output,
155                                &mut workspace,
156                            );
157                        }
158                    }
159                    offset = block_end;
160                }
161            }
162            Strategy::DFast => {
163                let short_size = 1usize << params.chain_log;
164                let long_size = 1usize << params.hash_log;
165                let mut hash_short = vec![0u32; short_size];
166                let mut hash_long = vec![0u32; long_size];
167                while offset < input.len() {
168                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
169                    let block_end = offset + chunk_size;
170                    let is_last = block_end >= input.len();
171
172                    if block_looks_incompressible(&input[offset..block_end]) {
173                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
174                    } else {
175                        dfast::compress_dfast_block(
176                            input,
177                            offset,
178                            block_end,
179                            params,
180                            &rep_offsets,
181                            &mut hash_short,
182                            &mut hash_long,
183                            &mut sequences,
184                        );
185                        block_encoder::encode_compressed_block(
186                            &input[offset..block_end],
187                            &sequences,
188                            &mut rep_offsets,
189                            is_last,
190                            output,
191                            &mut workspace,
192                        );
193                    }
194                    offset = block_end;
195                }
196            }
197        }
198    }
199
200    let hash = xxh64(input, 0);
201    let checksum = (hash & 0xFFFFFFFF) as u32;
202    output.extend_from_slice(&checksum.to_le_bytes());
203}
204
205pub fn compress_with_dict(
206    input: &[u8],
207    level: i32,
208    dict: &zrip_core::dict::Dictionary,
209) -> Result<Vec<u8>, CompressError> {
210    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
211    clamp_params_to_src_size(&mut params, input.len());
212
213    let mut output = Vec::with_capacity(input.len() + 32);
214
215    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
216
217    let fcs_size = if input.len() <= 255 {
218        1
219    } else if input.len() <= 0xFFFF + 256 {
220        2
221    } else if input.len() <= 0xFFFFFFFF {
222        4
223    } else {
224        8
225    };
226
227    let fcs_flag = match fcs_size {
228        1 => 0,
229        2 => 1,
230        4 => 2,
231        8 => 3,
232        _ => unreachable!(),
233    };
234
235    let dict_id = dict.id();
236    let dict_id_flag = if dict_id <= 0xFF {
237        1u8
238    } else if dict_id <= 0xFFFF {
239        2
240    } else {
241        3
242    };
243
244    let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
245    output.push(descriptor);
246
247    match dict_id_flag {
248        1 => output.push(dict_id as u8),
249        2 => output.extend_from_slice(&(dict_id as u16).to_le_bytes()),
250        3 => output.extend_from_slice(&dict_id.to_le_bytes()),
251        _ => unreachable!(),
252    }
253
254    match fcs_size {
255        1 => output.push(input.len() as u8),
256        2 => {
257            let v = (input.len() - 256) as u16;
258            output.extend_from_slice(&v.to_le_bytes());
259        }
260        4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
261        8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
262        _ => unreachable!(),
263    }
264
265    if input.is_empty() {
266        block_encoder::encode_raw_block(&[], true, &mut output);
267    } else {
268        let prefix = dict.content();
269        let mut rep_offsets = *dict.rep_offsets();
270        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
271
272        if input.len() <= MAX_BLOCK_SIZE {
273            let sequences = match params.strategy {
274                Strategy::Fast => {
275                    fast::compress_fast_with_prefix(input, &params, &rep_offsets, prefix)
276                }
277                Strategy::DFast => {
278                    dfast::compress_dfast_with_prefix(input, &params, &rep_offsets, prefix)
279                }
280            };
281            if params.force_raw_literals {
282                block_encoder::encode_compressed_block_raw(
283                    input,
284                    &sequences,
285                    &mut rep_offsets,
286                    true,
287                    &mut output,
288                    &mut workspace,
289                );
290            } else {
291                block_encoder::encode_compressed_block(
292                    input,
293                    &sequences,
294                    &mut rep_offsets,
295                    true,
296                    &mut output,
297                    &mut workspace,
298                );
299            }
300        } else {
301            let mut combined = Vec::with_capacity(prefix.len() + input.len());
302            combined.extend_from_slice(prefix);
303            combined.extend_from_slice(input);
304            let plen = prefix.len();
305            let hash_size = 1usize << params.hash_log;
306            let mut sequences = Vec::new();
307
308            match params.strategy {
309                Strategy::Fast => {
310                    let mut hash_table = vec![0u32; hash_size];
311                    fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
312                    let mut offset = 0;
313                    while offset < input.len() {
314                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
315                        let is_last = offset + chunk_size >= input.len();
316                        fast::compress_fast_block(
317                            &combined,
318                            plen + offset,
319                            plen + offset + chunk_size,
320                            &params,
321                            &rep_offsets,
322                            &mut hash_table,
323                            &mut sequences,
324                        );
325                        if params.force_raw_literals {
326                            block_encoder::encode_compressed_block_raw(
327                                &input[offset..offset + chunk_size],
328                                &sequences,
329                                &mut rep_offsets,
330                                is_last,
331                                &mut output,
332                                &mut workspace,
333                            );
334                        } else {
335                            block_encoder::encode_compressed_block(
336                                &input[offset..offset + chunk_size],
337                                &sequences,
338                                &mut rep_offsets,
339                                is_last,
340                                &mut output,
341                                &mut workspace,
342                            );
343                        }
344                        offset += chunk_size;
345                    }
346                }
347                Strategy::DFast => {
348                    let short_size = 1usize << params.chain_log;
349                    let long_size = 1usize << params.hash_log;
350                    let mut hash_short = vec![0u32; short_size];
351                    let mut hash_long = vec![0u32; long_size];
352                    dfast::prefill_hash_tables(
353                        &combined,
354                        plen,
355                        params.hash_log,
356                        params.chain_log,
357                        &mut hash_short,
358                        &mut hash_long,
359                    );
360                    let mut offset = 0;
361                    while offset < input.len() {
362                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
363                        let is_last = offset + chunk_size >= input.len();
364                        dfast::compress_dfast_block(
365                            &combined,
366                            plen + offset,
367                            plen + offset + chunk_size,
368                            &params,
369                            &rep_offsets,
370                            &mut hash_short,
371                            &mut hash_long,
372                            &mut sequences,
373                        );
374                        block_encoder::encode_compressed_block(
375                            &input[offset..offset + chunk_size],
376                            &sequences,
377                            &mut rep_offsets,
378                            is_last,
379                            &mut output,
380                            &mut workspace,
381                        );
382                        offset += chunk_size;
383                    }
384                }
385            }
386        }
387    }
388
389    let hash = xxh64(input, 0);
390    let checksum = (hash & 0xFFFFFFFF) as u32;
391    output.extend_from_slice(&checksum.to_le_bytes());
392
393    Ok(output)
394}
395
396pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
397    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
398    clamp_params_to_src_size(&mut params, input.len());
399    let mut buf = Vec::with_capacity(output.len());
400    compress_frame(input, &params, &mut buf);
401    if buf.len() > output.len() {
402        return Err(CompressError::OutputTooSmall);
403    }
404    output[..buf.len()].copy_from_slice(&buf);
405    Ok(buf.len())
406}