Skip to main content

zrip_encode/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3#![cfg_attr(feature = "paranoid", forbid(unsafe_code))]
4
5#[cfg(feature = "alloc")]
6extern crate alloc;
7
8pub(crate) mod block_encoder;
9#[cfg(feature = "std")]
10pub mod context;
11pub(crate) mod dfast;
12pub(crate) mod fast;
13pub(crate) mod primitives;
14pub(crate) mod sequences;
15pub mod strategy;
16#[cfg(feature = "std")]
17pub mod streaming;
18
19#[cfg(feature = "alloc")]
20use alloc::vec;
21#[cfg(feature = "alloc")]
22use alloc::vec::Vec;
23
24use crate::strategy::Strategy;
25use zrip_core::error::CompressError;
26use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
27use zrip_core::xxhash::xxh64;
28
29pub(crate) fn write_frame_header(output: &mut Vec<u8>, content_size: usize, dict_id: Option<u32>) {
30    output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
31
32    let fcs_size = if content_size <= 255 {
33        1
34    } else if content_size <= 0xFFFF + 256 {
35        2
36    } else if content_size <= 0xFFFF_FFFF {
37        4
38    } else {
39        8
40    };
41    let fcs_flag: u8 = match fcs_size {
42        1 => 0,
43        2 => 1,
44        4 => 2,
45        _ => 3,
46    };
47
48    let dict_id_flag: u8 = match dict_id {
49        None => 0,
50        Some(id) if id <= 0xFF => 1,
51        Some(id) if id <= 0xFFFF => 2,
52        Some(_) => 3,
53    };
54
55    let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
56    output.push(descriptor);
57
58    match dict_id {
59        Some(id) if id <= 0xFF => output.push(id as u8),
60        Some(id) if id <= 0xFFFF => output.extend_from_slice(&(id as u16).to_le_bytes()),
61        Some(id) => output.extend_from_slice(&id.to_le_bytes()),
62        None => {}
63    }
64
65    match fcs_size {
66        1 => output.push(content_size as u8),
67        2 => {
68            let v = (content_size - 256) as u16;
69            output.extend_from_slice(&v.to_le_bytes());
70        }
71        4 => output.extend_from_slice(&(content_size as u32).to_le_bytes()),
72        _ => output.extend_from_slice(&(content_size as u64).to_le_bytes()),
73    }
74}
75
76pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
77    const SAMPLE: usize = 1024;
78    const DISTINCT_THRESHOLD: u32 = 200;
79    const MAX_FREQ_DENOM: u32 = 24;
80    if data.len() < SAMPLE {
81        return false;
82    }
83    let mut counts = [0u16; 256];
84    for &b in &data[..SAMPLE] {
85        counts[b as usize] += 1;
86    }
87    let mut distinct: u32 = 0;
88    let mut max_freq: u16 = 0;
89    for &c in &counts {
90        distinct += (c > 0) as u32;
91        max_freq = max_freq.max(c);
92    }
93    distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
94}
95
96pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
97    if src_len >= 2 {
98        let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
99        params.hash_log = params.hash_log.min(src_log);
100        params.chain_log = params.chain_log.min(src_log);
101        params.window_log = params.window_log.min(src_log);
102    }
103}
104
105pub fn compress_with_params(
106    input: &[u8],
107    params: &strategy::LevelParams,
108) -> Result<Vec<u8>, CompressError> {
109    let mut params = *params;
110    clamp_params_to_src_size(&mut params, input.len());
111    compress_inner(input, &params)
112}
113
114pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
115    let params = strategy::level_params_for_size(level, input.len())
116        .ok_or(CompressError::InvalidLevel(level))?;
117    compress_inner(input, &params)
118}
119
120#[allow(clippy::unnecessary_wraps)]
121fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
122    let mut output = Vec::with_capacity(input.len() + 32);
123    compress_frame(input, params, &mut output);
124    Ok(output)
125}
126
127fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
128    write_frame_header(output, input.len(), None);
129
130    if input.is_empty() {
131        block_encoder::encode_raw_block(&[], true, output);
132    } else {
133        let hash_size = 1usize << params.hash_log;
134        let mut rep_offsets = [1u32, 4, 8];
135        let mut offset = 0;
136        let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
137        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
138
139        match params.strategy {
140            Strategy::Fast => {
141                let mut hash_table = vec![0u32; hash_size];
142                while offset < input.len() {
143                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
144                    let block_end = offset + chunk_size;
145                    let is_last = block_end >= input.len();
146
147                    if block_looks_incompressible(&input[offset..block_end]) {
148                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
149                    } else {
150                        fast::compress_fast_block(
151                            input,
152                            offset,
153                            block_end,
154                            params,
155                            &rep_offsets,
156                            &mut hash_table,
157                            &mut sequences,
158                        );
159                        if params.force_raw_literals {
160                            block_encoder::encode_compressed_block_raw(
161                                &input[offset..block_end],
162                                &sequences,
163                                &mut rep_offsets,
164                                is_last,
165                                output,
166                                &mut workspace,
167                            );
168                        } else {
169                            block_encoder::encode_compressed_block(
170                                &input[offset..block_end],
171                                &sequences,
172                                &mut rep_offsets,
173                                is_last,
174                                output,
175                                &mut workspace,
176                            );
177                        }
178                    }
179                    offset = block_end;
180                }
181            }
182            Strategy::DFast => {
183                let short_size = 1usize << params.chain_log;
184                let long_size = 1usize << params.hash_log;
185                let mut hash_short = vec![0u32; short_size];
186                let mut hash_long = vec![0u32; long_size];
187                while offset < input.len() {
188                    let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
189                    let block_end = offset + chunk_size;
190                    let is_last = block_end >= input.len();
191
192                    if block_looks_incompressible(&input[offset..block_end]) {
193                        block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
194                    } else {
195                        dfast::compress_dfast_block(
196                            input,
197                            offset,
198                            block_end,
199                            params,
200                            &rep_offsets,
201                            &mut hash_short,
202                            &mut hash_long,
203                            &mut sequences,
204                        );
205                        block_encoder::encode_compressed_block(
206                            &input[offset..block_end],
207                            &sequences,
208                            &mut rep_offsets,
209                            is_last,
210                            output,
211                            &mut workspace,
212                        );
213                    }
214                    offset = block_end;
215                }
216            }
217        }
218    }
219
220    let hash = xxh64(input, 0);
221    let checksum = (hash & 0xFFFF_FFFF) as u32;
222    output.extend_from_slice(&checksum.to_le_bytes());
223}
224
225pub fn compress_with_dict(
226    input: &[u8],
227    level: i32,
228    dict: &zrip_core::dict::Dictionary,
229) -> Result<Vec<u8>, CompressError> {
230    let total_window = dict.content().len() + input.len();
231    let params = strategy::level_params_for_size(level, total_window)
232        .ok_or(CompressError::InvalidLevel(level))?;
233
234    let mut output = Vec::with_capacity(input.len() + 32);
235    write_frame_header(&mut output, input.len(), Some(dict.id()));
236
237    if input.is_empty() {
238        block_encoder::encode_raw_block(&[], true, &mut output);
239    } else {
240        let prefix = dict.content();
241        let mut rep_offsets = *dict.rep_offsets();
242        let mut workspace = block_encoder::BlockEncodeWorkspace::new();
243
244        workspace.prev_ll = dict
245            .ll_table()
246            .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 35));
247        workspace.prev_of = dict
248            .of_table()
249            .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 31));
250        workspace.prev_ml = dict
251            .ml_table()
252            .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 52));
253        workspace.prev_huffman = dict.huf_table().and_then(|(dt, tl)| {
254            zrip_core::huffman::encode::HuffmanEncodeTable::from_decode_table(dt, tl)
255        });
256
257        if input.len() <= MAX_BLOCK_SIZE {
258            let sequences = match params.strategy {
259                Strategy::Fast => {
260                    fast::compress_fast_with_prefix(input, &params, &rep_offsets, prefix)
261                }
262                Strategy::DFast => {
263                    dfast::compress_dfast_with_prefix(input, &params, &rep_offsets, prefix)
264                }
265            };
266            if params.force_raw_literals {
267                block_encoder::encode_compressed_block_raw(
268                    input,
269                    &sequences,
270                    &mut rep_offsets,
271                    true,
272                    &mut output,
273                    &mut workspace,
274                );
275            } else {
276                block_encoder::encode_compressed_block(
277                    input,
278                    &sequences,
279                    &mut rep_offsets,
280                    true,
281                    &mut output,
282                    &mut workspace,
283                );
284            }
285        } else {
286            let mut combined = Vec::with_capacity(prefix.len() + input.len());
287            combined.extend_from_slice(prefix);
288            combined.extend_from_slice(input);
289            let plen = prefix.len();
290            let hash_size = 1usize << params.hash_log;
291            let mut sequences = Vec::new();
292
293            match params.strategy {
294                Strategy::Fast => {
295                    let mut hash_table = vec![0u32; hash_size];
296                    fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
297                    let mut offset = 0;
298                    while offset < input.len() {
299                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
300                        let is_last = offset + chunk_size >= input.len();
301                        fast::compress_fast_block(
302                            &combined,
303                            plen + offset,
304                            plen + offset + chunk_size,
305                            &params,
306                            &rep_offsets,
307                            &mut hash_table,
308                            &mut sequences,
309                        );
310                        if params.force_raw_literals {
311                            block_encoder::encode_compressed_block_raw(
312                                &input[offset..offset + chunk_size],
313                                &sequences,
314                                &mut rep_offsets,
315                                is_last,
316                                &mut output,
317                                &mut workspace,
318                            );
319                        } else {
320                            block_encoder::encode_compressed_block(
321                                &input[offset..offset + chunk_size],
322                                &sequences,
323                                &mut rep_offsets,
324                                is_last,
325                                &mut output,
326                                &mut workspace,
327                            );
328                        }
329                        offset += chunk_size;
330                    }
331                }
332                Strategy::DFast => {
333                    let short_size = 1usize << params.chain_log;
334                    let long_size = 1usize << params.hash_log;
335                    let mut hash_short = vec![0u32; short_size];
336                    let mut hash_long = vec![0u32; long_size];
337                    dfast::prefill_hash_tables(
338                        &combined,
339                        plen,
340                        params.hash_log,
341                        params.chain_log,
342                        params.min_match,
343                        &mut hash_short,
344                        &mut hash_long,
345                    );
346                    let mut offset = 0;
347                    while offset < input.len() {
348                        let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
349                        let is_last = offset + chunk_size >= input.len();
350                        dfast::compress_dfast_block(
351                            &combined,
352                            plen + offset,
353                            plen + offset + chunk_size,
354                            &params,
355                            &rep_offsets,
356                            &mut hash_short,
357                            &mut hash_long,
358                            &mut sequences,
359                        );
360                        block_encoder::encode_compressed_block(
361                            &input[offset..offset + chunk_size],
362                            &sequences,
363                            &mut rep_offsets,
364                            is_last,
365                            &mut output,
366                            &mut workspace,
367                        );
368                        offset += chunk_size;
369                    }
370                }
371            }
372        }
373    }
374
375    let hash = xxh64(input, 0);
376    let checksum = (hash & 0xFFFF_FFFF) as u32;
377    output.extend_from_slice(&checksum.to_le_bytes());
378
379    Ok(output)
380}
381
382pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
383    let params = strategy::level_params_for_size(level, input.len())
384        .ok_or(CompressError::InvalidLevel(level))?;
385    let mut buf = Vec::with_capacity(output.len());
386    compress_frame(input, &params, &mut buf);
387    if buf.len() > output.len() {
388        return Err(CompressError::OutputTooSmall);
389    }
390    output[..buf.len()].copy_from_slice(&buf);
391    Ok(buf.len())
392}