Skip to main content

zrip_encode/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod primitives;
13pub(crate) mod sequences;
14pub mod strategy;
15#[cfg(feature = "std")]
16pub mod streaming;
17
18#[cfg(feature = "alloc")]
19use alloc::vec;
20#[cfg(feature = "alloc")]
21use alloc::vec::Vec;
22
23use crate::strategy::Strategy;
24use zrip_core::error::CompressError;
25use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
26use zrip_core::xxhash::xxh64;
27
28pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
29    const SAMPLE: usize = 1024;
30    const DISTINCT_THRESHOLD: u32 = 200;
31    const MAX_FREQ_DENOM: u32 = 24;
32    if data.len() < SAMPLE {
33        return false;
34    }
35    let mut counts = [0u16; 256];
36    for &b in &data[..SAMPLE] {
37        counts[b as usize] += 1;
38    }
39    let mut distinct: u32 = 0;
40    let mut max_freq: u16 = 0;
41    for &c in &counts {
42        distinct += (c > 0) as u32;
43        max_freq = max_freq.max(c);
44    }
45    distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
46}
47
48pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
49    if src_len >= 2 {
50        let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
51        params.hash_log = params.hash_log.min(src_log);
52        params.chain_log = params.chain_log.min(src_log);
53        params.window_log = params.window_log.min(src_log);
54    }
55}
56
57pub fn compress_with_params(
58    input: &[u8],
59    params: &strategy::LevelParams,
60) -> Result<Vec<u8>, CompressError> {
61    let mut params = *params;
62    clamp_params_to_src_size(&mut params, input.len());
63    compress_inner(input, &params)
64}
65
66pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
67    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
68    clamp_params_to_src_size(&mut params, input.len());
69    compress_inner(input, &params)
70}
71
72#[allow(clippy::unnecessary_wraps)]
73fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
74    let mut output = Vec::with_capacity(input.len() + 32);
75    compress_frame(input, params, &mut output);
76    Ok(output)
77}
78
79fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
80    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
81
82    let fcs_size = if input.len() <= 255 {
83        1
84    } else if input.len() <= 0xFFFF + 256 {
85        2
86    } else if input.len() <= 0xFFFF_FFFF {
87        4
88    } else {
89        8
90    };
91
92    let fcs_flag = match fcs_size {
93        1 => 0,
94        2 => 1,
95        4 => 2,
96        8 => 3,
97        _ => unreachable!(),
98    };
99
100    let descriptor = 0x20 | 0x04 | (fcs_flag << 6);
101    output.push(descriptor);
102
103    match fcs_size {
104        1 => output.push(input.len() as u8),
105        2 => {
106            let v = (input.len() - 256) as u16;
107            output.extend_from_slice(&v.to_le_bytes());
108        }
109        4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
110        8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
111        _ => unreachable!(),
112    }
113
114    if input.is_empty() {
115        block_encoder::encode_raw_block(&[], true, output);
116    } else {
117        let hash_size = 1usize << params.hash_log;
118        let mut rep_offsets = [1u32, 4, 8];
119        let mut offset = 0;
120        let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
121        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
122
123        match params.strategy {
124            Strategy::Fast => {
125                let mut hash_table = vec![0u32; hash_size];
126                while offset < input.len() {
127                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
128                    let block_end = offset + chunk_size;
129                    let is_last = block_end >= input.len();
130
131                    if block_looks_incompressible(&input[offset..block_end]) {
132                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
133                    } else {
134                        fast::compress_fast_block(
135                            input,
136                            offset,
137                            block_end,
138                            params,
139                            &rep_offsets,
140                            &mut hash_table,
141                            &mut sequences,
142                        );
143                        if params.force_raw_literals {
144                            block_encoder::encode_compressed_block_raw(
145                                &input[offset..block_end],
146                                &sequences,
147                                &mut rep_offsets,
148                                is_last,
149                                output,
150                                &mut workspace,
151                            );
152                        } else {
153                            block_encoder::encode_compressed_block(
154                                &input[offset..block_end],
155                                &sequences,
156                                &mut rep_offsets,
157                                is_last,
158                                output,
159                                &mut workspace,
160                            );
161                        }
162                    }
163                    offset = block_end;
164                }
165            }
166            Strategy::DFast => {
167                let short_size = 1usize << params.chain_log;
168                let long_size = 1usize << params.hash_log;
169                let mut hash_short = vec![0u32; short_size];
170                let mut hash_long = vec![0u32; long_size];
171                while offset < input.len() {
172                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
173                    let block_end = offset + chunk_size;
174                    let is_last = block_end >= input.len();
175
176                    if block_looks_incompressible(&input[offset..block_end]) {
177                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
178                    } else {
179                        dfast::compress_dfast_block(
180                            input,
181                            offset,
182                            block_end,
183                            params,
184                            &rep_offsets,
185                            &mut hash_short,
186                            &mut hash_long,
187                            &mut sequences,
188                        );
189                        block_encoder::encode_compressed_block(
190                            &input[offset..block_end],
191                            &sequences,
192                            &mut rep_offsets,
193                            is_last,
194                            output,
195                            &mut workspace,
196                        );
197                    }
198                    offset = block_end;
199                }
200            }
201        }
202    }
203
204    let hash = xxh64(input, 0);
205    let checksum = (hash & 0xFFFF_FFFF) as u32;
206    output.extend_from_slice(&checksum.to_le_bytes());
207}
208
209pub fn compress_with_dict(
210    input: &[u8],
211    level: i32,
212    dict: &zrip_core::dict::Dictionary,
213) -> Result<Vec<u8>, CompressError> {
214    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
215    clamp_params_to_src_size(&mut params, input.len());
216
217    let mut output = Vec::with_capacity(input.len() + 32);
218
219    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
220
221    let fcs_size = if input.len() <= 255 {
222        1
223    } else if input.len() <= 0xFFFF + 256 {
224        2
225    } else if input.len() <= 0xFFFF_FFFF {
226        4
227    } else {
228        8
229    };
230
231    let fcs_flag = match fcs_size {
232        1 => 0,
233        2 => 1,
234        4 => 2,
235        8 => 3,
236        _ => unreachable!(),
237    };
238
239    let dict_id = dict.id();
240    let dict_id_flag = if dict_id <= 0xFF {
241        1u8
242    } else if dict_id <= 0xFFFF {
243        2
244    } else {
245        3
246    };
247
248    let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
249    output.push(descriptor);
250
251    match dict_id_flag {
252        1 => output.push(dict_id as u8),
253        2 => output.extend_from_slice(&(dict_id as u16).to_le_bytes()),
254        3 => output.extend_from_slice(&dict_id.to_le_bytes()),
255        _ => unreachable!(),
256    }
257
258    match fcs_size {
259        1 => output.push(input.len() as u8),
260        2 => {
261            let v = (input.len() - 256) as u16;
262            output.extend_from_slice(&v.to_le_bytes());
263        }
264        4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
265        8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
266        _ => unreachable!(),
267    }
268
269    if input.is_empty() {
270        block_encoder::encode_raw_block(&[], true, &mut output);
271    } else {
272        let prefix = dict.content();
273        let mut rep_offsets = *dict.rep_offsets();
274        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
275
276        if input.len() <= MAX_BLOCK_SIZE {
277            let sequences = match params.strategy {
278                Strategy::Fast => {
279                    fast::compress_fast_with_prefix(input, &params, &rep_offsets, prefix)
280                }
281                Strategy::DFast => {
282                    dfast::compress_dfast_with_prefix(input, &params, &rep_offsets, prefix)
283                }
284            };
285            if params.force_raw_literals {
286                block_encoder::encode_compressed_block_raw(
287                    input,
288                    &sequences,
289                    &mut rep_offsets,
290                    true,
291                    &mut output,
292                    &mut workspace,
293                );
294            } else {
295                block_encoder::encode_compressed_block(
296                    input,
297                    &sequences,
298                    &mut rep_offsets,
299                    true,
300                    &mut output,
301                    &mut workspace,
302                );
303            }
304        } else {
305            let mut combined = Vec::with_capacity(prefix.len() + input.len());
306            combined.extend_from_slice(prefix);
307            combined.extend_from_slice(input);
308            let plen = prefix.len();
309            let hash_size = 1usize << params.hash_log;
310            let mut sequences = Vec::new();
311
312            match params.strategy {
313                Strategy::Fast => {
314                    let mut hash_table = vec![0u32; hash_size];
315                    fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
316                    let mut offset = 0;
317                    while offset < input.len() {
318                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
319                        let is_last = offset + chunk_size >= input.len();
320                        fast::compress_fast_block(
321                            &combined,
322                            plen + offset,
323                            plen + offset + chunk_size,
324                            &params,
325                            &rep_offsets,
326                            &mut hash_table,
327                            &mut sequences,
328                        );
329                        if params.force_raw_literals {
330                            block_encoder::encode_compressed_block_raw(
331                                &input[offset..offset + chunk_size],
332                                &sequences,
333                                &mut rep_offsets,
334                                is_last,
335                                &mut output,
336                                &mut workspace,
337                            );
338                        } else {
339                            block_encoder::encode_compressed_block(
340                                &input[offset..offset + chunk_size],
341                                &sequences,
342                                &mut rep_offsets,
343                                is_last,
344                                &mut output,
345                                &mut workspace,
346                            );
347                        }
348                        offset += chunk_size;
349                    }
350                }
351                Strategy::DFast => {
352                    let short_size = 1usize << params.chain_log;
353                    let long_size = 1usize << params.hash_log;
354                    let mut hash_short = vec![0u32; short_size];
355                    let mut hash_long = vec![0u32; long_size];
356                    dfast::prefill_hash_tables(
357                        &combined,
358                        plen,
359                        params.hash_log,
360                        params.chain_log,
361                        &mut hash_short,
362                        &mut hash_long,
363                    );
364                    let mut offset = 0;
365                    while offset < input.len() {
366                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
367                        let is_last = offset + chunk_size >= input.len();
368                        dfast::compress_dfast_block(
369                            &combined,
370                            plen + offset,
371                            plen + offset + chunk_size,
372                            &params,
373                            &rep_offsets,
374                            &mut hash_short,
375                            &mut hash_long,
376                            &mut sequences,
377                        );
378                        block_encoder::encode_compressed_block(
379                            &input[offset..offset + chunk_size],
380                            &sequences,
381                            &mut rep_offsets,
382                            is_last,
383                            &mut output,
384                            &mut workspace,
385                        );
386                        offset += chunk_size;
387                    }
388                }
389            }
390        }
391    }
392
393    let hash = xxh64(input, 0);
394    let checksum = (hash & 0xFFFF_FFFF) as u32;
395    output.extend_from_slice(&checksum.to_le_bytes());
396
397    Ok(output)
398}
399
400pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
401    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
402    clamp_params_to_src_size(&mut params, input.len());
403    let mut buf = Vec::with_capacity(output.len());
404    compress_frame(input, &params, &mut buf);
405    if buf.len() > output.len() {
406        return Err(CompressError::OutputTooSmall);
407    }
408    output[..buf.len()].copy_from_slice(&buf);
409    Ok(buf.len())
410}