Skip to main content

zrip_encode/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod primitives;
13pub(crate) mod sequences;
14pub mod strategy;
15#[cfg(feature = "std")]
16pub mod streaming;
17
18#[cfg(feature = "alloc")]
19use alloc::vec;
20#[cfg(feature = "alloc")]
21use alloc::vec::Vec;
22
23use crate::strategy::Strategy;
24use zrip_core::error::CompressError;
25use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
26use zrip_core::xxhash::xxh64;
27
28pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
29    const SAMPLE: usize = 1024;
30    const DISTINCT_THRESHOLD: u32 = 200;
31    const MAX_FREQ_DENOM: u32 = 24;
32    if data.len() < SAMPLE {
33        return false;
34    }
35    let mut counts = [0u16; 256];
36    for &b in &data[..SAMPLE] {
37        counts[b as usize] += 1;
38    }
39    let mut distinct: u32 = 0;
40    let mut max_freq: u16 = 0;
41    for &c in &counts {
42        distinct += (c > 0) as u32;
43        max_freq = max_freq.max(c);
44    }
45    distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
46}
47
48pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
49    if src_len >= 2 {
50        let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
51        params.hash_log = params.hash_log.min(src_log);
52        params.chain_log = params.chain_log.min(src_log);
53        params.window_log = params.window_log.min(src_log);
54    }
55}
56
57pub fn compress_with_params(
58    input: &[u8],
59    params: &strategy::LevelParams,
60) -> Result<Vec<u8>, CompressError> {
61    let mut params = *params;
62    clamp_params_to_src_size(&mut params, input.len());
63    compress_inner(input, &params)
64}
65
66pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
67    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
68    clamp_params_to_src_size(&mut params, input.len());
69    compress_inner(input, &params)
70}
71
72fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
73    let mut output = Vec::with_capacity(input.len() + 32);
74    compress_frame(input, params, &mut output);
75    Ok(output)
76}
77
78fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
79    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
80
81    let fcs_size = if input.len() <= 255 {
82        1
83    } else if input.len() <= 0xFFFF + 256 {
84        2
85    } else if input.len() <= 0xFFFFFFFF {
86        4
87    } else {
88        8
89    };
90
91    let fcs_flag = match fcs_size {
92        1 => 0,
93        2 => 1,
94        4 => 2,
95        8 => 3,
96        _ => unreachable!(),
97    };
98
99    let descriptor = 0x20 | 0x04 | (fcs_flag << 6);
100    output.push(descriptor);
101
102    match fcs_size {
103        1 => output.push(input.len() as u8),
104        2 => {
105            let v = (input.len() - 256) as u16;
106            output.extend_from_slice(&v.to_le_bytes());
107        }
108        4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
109        8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
110        _ => unreachable!(),
111    }
112
113    if input.is_empty() {
114        block_encoder::encode_raw_block(&[], true, output);
115    } else {
116        let hash_size = 1usize << params.hash_log;
117        let mut rep_offsets = [1u32, 4, 8];
118        let mut offset = 0;
119        let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
120        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
121
122        match params.strategy {
123            Strategy::Fast => {
124                let mut hash_table = vec![0u32; hash_size];
125                while offset < input.len() {
126                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
127                    let block_end = offset + chunk_size;
128                    let is_last = block_end >= input.len();
129
130                    if block_looks_incompressible(&input[offset..block_end]) {
131                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
132                    } else {
133                        fast::compress_fast_block(
134                            input,
135                            offset,
136                            block_end,
137                            params,
138                            &rep_offsets,
139                            &mut hash_table,
140                            &mut sequences,
141                        );
142                        if params.force_raw_literals {
143                            block_encoder::encode_compressed_block_raw(
144                                &input[offset..block_end],
145                                &sequences,
146                                &mut rep_offsets,
147                                is_last,
148                                output,
149                                &mut workspace,
150                            );
151                        } else {
152                            block_encoder::encode_compressed_block(
153                                &input[offset..block_end],
154                                &sequences,
155                                &mut rep_offsets,
156                                is_last,
157                                output,
158                                &mut workspace,
159                            );
160                        }
161                    }
162                    offset = block_end;
163                }
164            }
165            Strategy::DFast => {
166                let short_size = 1usize << params.chain_log;
167                let long_size = 1usize << params.hash_log;
168                let mut hash_short = vec![0u32; short_size];
169                let mut hash_long = vec![0u32; long_size];
170                while offset < input.len() {
171                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
172                    let block_end = offset + chunk_size;
173                    let is_last = block_end >= input.len();
174
175                    if block_looks_incompressible(&input[offset..block_end]) {
176                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
177                    } else {
178                        dfast::compress_dfast_block(
179                            input,
180                            offset,
181                            block_end,
182                            params,
183                            &rep_offsets,
184                            &mut hash_short,
185                            &mut hash_long,
186                            &mut sequences,
187                        );
188                        block_encoder::encode_compressed_block(
189                            &input[offset..block_end],
190                            &sequences,
191                            &mut rep_offsets,
192                            is_last,
193                            output,
194                            &mut workspace,
195                        );
196                    }
197                    offset = block_end;
198                }
199            }
200        }
201    }
202
203    let hash = xxh64(input, 0);
204    let checksum = (hash & 0xFFFFFFFF) as u32;
205    output.extend_from_slice(&checksum.to_le_bytes());
206}
207
208pub fn compress_with_dict(
209    input: &[u8],
210    level: i32,
211    dict: &zrip_core::dict::Dictionary,
212) -> Result<Vec<u8>, CompressError> {
213    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
214    clamp_params_to_src_size(&mut params, input.len());
215
216    let mut output = Vec::with_capacity(input.len() + 32);
217
218    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
219
220    let fcs_size = if input.len() <= 255 {
221        1
222    } else if input.len() <= 0xFFFF + 256 {
223        2
224    } else if input.len() <= 0xFFFFFFFF {
225        4
226    } else {
227        8
228    };
229
230    let fcs_flag = match fcs_size {
231        1 => 0,
232        2 => 1,
233        4 => 2,
234        8 => 3,
235        _ => unreachable!(),
236    };
237
238    let dict_id = dict.id();
239    let dict_id_flag = if dict_id <= 0xFF {
240        1u8
241    } else if dict_id <= 0xFFFF {
242        2
243    } else {
244        3
245    };
246
247    let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
248    output.push(descriptor);
249
250    match dict_id_flag {
251        1 => output.push(dict_id as u8),
252        2 => output.extend_from_slice(&(dict_id as u16).to_le_bytes()),
253        3 => output.extend_from_slice(&dict_id.to_le_bytes()),
254        _ => unreachable!(),
255    }
256
257    match fcs_size {
258        1 => output.push(input.len() as u8),
259        2 => {
260            let v = (input.len() - 256) as u16;
261            output.extend_from_slice(&v.to_le_bytes());
262        }
263        4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
264        8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
265        _ => unreachable!(),
266    }
267
268    if input.is_empty() {
269        block_encoder::encode_raw_block(&[], true, &mut output);
270    } else {
271        let prefix = dict.content();
272        let mut rep_offsets = *dict.rep_offsets();
273        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
274
275        if input.len() <= MAX_BLOCK_SIZE {
276            let sequences = match params.strategy {
277                Strategy::Fast => {
278                    fast::compress_fast_with_prefix(input, &params, &rep_offsets, prefix)
279                }
280                Strategy::DFast => {
281                    dfast::compress_dfast_with_prefix(input, &params, &rep_offsets, prefix)
282                }
283            };
284            if params.force_raw_literals {
285                block_encoder::encode_compressed_block_raw(
286                    input,
287                    &sequences,
288                    &mut rep_offsets,
289                    true,
290                    &mut output,
291                    &mut workspace,
292                );
293            } else {
294                block_encoder::encode_compressed_block(
295                    input,
296                    &sequences,
297                    &mut rep_offsets,
298                    true,
299                    &mut output,
300                    &mut workspace,
301                );
302            }
303        } else {
304            let mut combined = Vec::with_capacity(prefix.len() + input.len());
305            combined.extend_from_slice(prefix);
306            combined.extend_from_slice(input);
307            let plen = prefix.len();
308            let hash_size = 1usize << params.hash_log;
309            let mut sequences = Vec::new();
310
311            match params.strategy {
312                Strategy::Fast => {
313                    let mut hash_table = vec![0u32; hash_size];
314                    fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
315                    let mut offset = 0;
316                    while offset < input.len() {
317                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
318                        let is_last = offset + chunk_size >= input.len();
319                        fast::compress_fast_block(
320                            &combined,
321                            plen + offset,
322                            plen + offset + chunk_size,
323                            &params,
324                            &rep_offsets,
325                            &mut hash_table,
326                            &mut sequences,
327                        );
328                        if params.force_raw_literals {
329                            block_encoder::encode_compressed_block_raw(
330                                &input[offset..offset + chunk_size],
331                                &sequences,
332                                &mut rep_offsets,
333                                is_last,
334                                &mut output,
335                                &mut workspace,
336                            );
337                        } else {
338                            block_encoder::encode_compressed_block(
339                                &input[offset..offset + chunk_size],
340                                &sequences,
341                                &mut rep_offsets,
342                                is_last,
343                                &mut output,
344                                &mut workspace,
345                            );
346                        }
347                        offset += chunk_size;
348                    }
349                }
350                Strategy::DFast => {
351                    let short_size = 1usize << params.chain_log;
352                    let long_size = 1usize << params.hash_log;
353                    let mut hash_short = vec![0u32; short_size];
354                    let mut hash_long = vec![0u32; long_size];
355                    dfast::prefill_hash_tables(
356                        &combined,
357                        plen,
358                        params.hash_log,
359                        params.chain_log,
360                        &mut hash_short,
361                        &mut hash_long,
362                    );
363                    let mut offset = 0;
364                    while offset < input.len() {
365                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
366                        let is_last = offset + chunk_size >= input.len();
367                        dfast::compress_dfast_block(
368                            &combined,
369                            plen + offset,
370                            plen + offset + chunk_size,
371                            &params,
372                            &rep_offsets,
373                            &mut hash_short,
374                            &mut hash_long,
375                            &mut sequences,
376                        );
377                        block_encoder::encode_compressed_block(
378                            &input[offset..offset + chunk_size],
379                            &sequences,
380                            &mut rep_offsets,
381                            is_last,
382                            &mut output,
383                            &mut workspace,
384                        );
385                        offset += chunk_size;
386                    }
387                }
388            }
389        }
390    }
391
392    let hash = xxh64(input, 0);
393    let checksum = (hash & 0xFFFFFFFF) as u32;
394    output.extend_from_slice(&checksum.to_le_bytes());
395
396    Ok(output)
397}
398
399pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
400    let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
401    clamp_params_to_src_size(&mut params, input.len());
402    let mut buf = Vec::with_capacity(output.len());
403    compress_frame(input, &params, &mut buf);
404    if buf.len() > output.len() {
405        return Err(CompressError::OutputTooSmall);
406    }
407    output[..buf.len()].copy_from_slice(&buf);
408    Ok(buf.len())
409}