1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod primitives;
13pub(crate) mod sequences;
14pub mod strategy;
15#[cfg(feature = "std")]
16pub mod streaming;
17
18#[cfg(feature = "alloc")]
19use alloc::vec;
20#[cfg(feature = "alloc")]
21use alloc::vec::Vec;
22
23use crate::strategy::Strategy;
24use zrip_core::error::CompressError;
25use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
26use zrip_core::xxhash::xxh64;
27
28pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
29 const SAMPLE: usize = 1024;
30 const DISTINCT_THRESHOLD: u32 = 200;
31 const MAX_FREQ_DENOM: u32 = 24;
32 if data.len() < SAMPLE {
33 return false;
34 }
35 let mut counts = [0u16; 256];
36 for &b in &data[..SAMPLE] {
37 counts[b as usize] += 1;
38 }
39 let mut distinct: u32 = 0;
40 let mut max_freq: u16 = 0;
41 for &c in &counts {
42 distinct += (c > 0) as u32;
43 max_freq = max_freq.max(c);
44 }
45 distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
46}
47
48pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
49 if src_len >= 2 {
50 let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
51 params.hash_log = params.hash_log.min(src_log);
52 params.chain_log = params.chain_log.min(src_log);
53 params.window_log = params.window_log.min(src_log);
54 }
55}
56
57pub fn compress_with_params(
58 input: &[u8],
59 params: &strategy::LevelParams,
60) -> Result<Vec<u8>, CompressError> {
61 let mut params = *params;
62 clamp_params_to_src_size(&mut params, input.len());
63 compress_inner(input, ¶ms)
64}
65
66pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
67 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
68 clamp_params_to_src_size(&mut params, input.len());
69 compress_inner(input, ¶ms)
70}
71
72#[allow(clippy::unnecessary_wraps)]
73fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
74 let mut output = Vec::with_capacity(input.len() + 32);
75 compress_frame(input, params, &mut output);
76 Ok(output)
77}
78
79fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
80 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
81
82 let fcs_size = if input.len() <= 255 {
83 1
84 } else if input.len() <= 0xFFFF + 256 {
85 2
86 } else if input.len() <= 0xFFFF_FFFF {
87 4
88 } else {
89 8
90 };
91
92 let fcs_flag = match fcs_size {
93 1 => 0,
94 2 => 1,
95 4 => 2,
96 8 => 3,
97 _ => unreachable!(),
98 };
99
100 let descriptor = 0x20 | 0x04 | (fcs_flag << 6);
101 output.push(descriptor);
102
103 match fcs_size {
104 1 => output.push(input.len() as u8),
105 2 => {
106 let v = (input.len() - 256) as u16;
107 output.extend_from_slice(&v.to_le_bytes());
108 }
109 4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
110 8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
111 _ => unreachable!(),
112 }
113
114 if input.is_empty() {
115 block_encoder::encode_raw_block(&[], true, output);
116 } else {
117 let hash_size = 1usize << params.hash_log;
118 let mut rep_offsets = [1u32, 4, 8];
119 let mut offset = 0;
120 let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
121 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
122
123 match params.strategy {
124 Strategy::Fast => {
125 let mut hash_table = vec![0u32; hash_size];
126 while offset < input.len() {
127 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
128 let block_end = offset + chunk_size;
129 let is_last = block_end >= input.len();
130
131 if block_looks_incompressible(&input[offset..block_end]) {
132 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
133 } else {
134 fast::compress_fast_block(
135 input,
136 offset,
137 block_end,
138 params,
139 &rep_offsets,
140 &mut hash_table,
141 &mut sequences,
142 );
143 if params.force_raw_literals {
144 block_encoder::encode_compressed_block_raw(
145 &input[offset..block_end],
146 &sequences,
147 &mut rep_offsets,
148 is_last,
149 output,
150 &mut workspace,
151 );
152 } else {
153 block_encoder::encode_compressed_block(
154 &input[offset..block_end],
155 &sequences,
156 &mut rep_offsets,
157 is_last,
158 output,
159 &mut workspace,
160 );
161 }
162 }
163 offset = block_end;
164 }
165 }
166 Strategy::DFast => {
167 let short_size = 1usize << params.chain_log;
168 let long_size = 1usize << params.hash_log;
169 let mut hash_short = vec![0u32; short_size];
170 let mut hash_long = vec![0u32; long_size];
171 while offset < input.len() {
172 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
173 let block_end = offset + chunk_size;
174 let is_last = block_end >= input.len();
175
176 if block_looks_incompressible(&input[offset..block_end]) {
177 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
178 } else {
179 dfast::compress_dfast_block(
180 input,
181 offset,
182 block_end,
183 params,
184 &rep_offsets,
185 &mut hash_short,
186 &mut hash_long,
187 &mut sequences,
188 );
189 block_encoder::encode_compressed_block(
190 &input[offset..block_end],
191 &sequences,
192 &mut rep_offsets,
193 is_last,
194 output,
195 &mut workspace,
196 );
197 }
198 offset = block_end;
199 }
200 }
201 }
202 }
203
204 let hash = xxh64(input, 0);
205 let checksum = (hash & 0xFFFF_FFFF) as u32;
206 output.extend_from_slice(&checksum.to_le_bytes());
207}
208
209pub fn compress_with_dict(
210 input: &[u8],
211 level: i32,
212 dict: &zrip_core::dict::Dictionary,
213) -> Result<Vec<u8>, CompressError> {
214 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
215 clamp_params_to_src_size(&mut params, input.len());
216
217 let mut output = Vec::with_capacity(input.len() + 32);
218
219 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
220
221 let fcs_size = if input.len() <= 255 {
222 1
223 } else if input.len() <= 0xFFFF + 256 {
224 2
225 } else if input.len() <= 0xFFFF_FFFF {
226 4
227 } else {
228 8
229 };
230
231 let fcs_flag = match fcs_size {
232 1 => 0,
233 2 => 1,
234 4 => 2,
235 8 => 3,
236 _ => unreachable!(),
237 };
238
239 let dict_id = dict.id();
240 let dict_id_flag = if dict_id <= 0xFF {
241 1u8
242 } else if dict_id <= 0xFFFF {
243 2
244 } else {
245 3
246 };
247
248 let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
249 output.push(descriptor);
250
251 match dict_id_flag {
252 1 => output.push(dict_id as u8),
253 2 => output.extend_from_slice(&(dict_id as u16).to_le_bytes()),
254 3 => output.extend_from_slice(&dict_id.to_le_bytes()),
255 _ => unreachable!(),
256 }
257
258 match fcs_size {
259 1 => output.push(input.len() as u8),
260 2 => {
261 let v = (input.len() - 256) as u16;
262 output.extend_from_slice(&v.to_le_bytes());
263 }
264 4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
265 8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
266 _ => unreachable!(),
267 }
268
269 if input.is_empty() {
270 block_encoder::encode_raw_block(&[], true, &mut output);
271 } else {
272 let prefix = dict.content();
273 let mut rep_offsets = *dict.rep_offsets();
274 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
275
276 if input.len() <= MAX_BLOCK_SIZE {
277 let sequences = match params.strategy {
278 Strategy::Fast => {
279 fast::compress_fast_with_prefix(input, ¶ms, &rep_offsets, prefix)
280 }
281 Strategy::DFast => {
282 dfast::compress_dfast_with_prefix(input, ¶ms, &rep_offsets, prefix)
283 }
284 };
285 if params.force_raw_literals {
286 block_encoder::encode_compressed_block_raw(
287 input,
288 &sequences,
289 &mut rep_offsets,
290 true,
291 &mut output,
292 &mut workspace,
293 );
294 } else {
295 block_encoder::encode_compressed_block(
296 input,
297 &sequences,
298 &mut rep_offsets,
299 true,
300 &mut output,
301 &mut workspace,
302 );
303 }
304 } else {
305 let mut combined = Vec::with_capacity(prefix.len() + input.len());
306 combined.extend_from_slice(prefix);
307 combined.extend_from_slice(input);
308 let plen = prefix.len();
309 let hash_size = 1usize << params.hash_log;
310 let mut sequences = Vec::new();
311
312 match params.strategy {
313 Strategy::Fast => {
314 let mut hash_table = vec![0u32; hash_size];
315 fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
316 let mut offset = 0;
317 while offset < input.len() {
318 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
319 let is_last = offset + chunk_size >= input.len();
320 fast::compress_fast_block(
321 &combined,
322 plen + offset,
323 plen + offset + chunk_size,
324 ¶ms,
325 &rep_offsets,
326 &mut hash_table,
327 &mut sequences,
328 );
329 if params.force_raw_literals {
330 block_encoder::encode_compressed_block_raw(
331 &input[offset..offset + chunk_size],
332 &sequences,
333 &mut rep_offsets,
334 is_last,
335 &mut output,
336 &mut workspace,
337 );
338 } else {
339 block_encoder::encode_compressed_block(
340 &input[offset..offset + chunk_size],
341 &sequences,
342 &mut rep_offsets,
343 is_last,
344 &mut output,
345 &mut workspace,
346 );
347 }
348 offset += chunk_size;
349 }
350 }
351 Strategy::DFast => {
352 let short_size = 1usize << params.chain_log;
353 let long_size = 1usize << params.hash_log;
354 let mut hash_short = vec![0u32; short_size];
355 let mut hash_long = vec![0u32; long_size];
356 dfast::prefill_hash_tables(
357 &combined,
358 plen,
359 params.hash_log,
360 params.chain_log,
361 &mut hash_short,
362 &mut hash_long,
363 );
364 let mut offset = 0;
365 while offset < input.len() {
366 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
367 let is_last = offset + chunk_size >= input.len();
368 dfast::compress_dfast_block(
369 &combined,
370 plen + offset,
371 plen + offset + chunk_size,
372 ¶ms,
373 &rep_offsets,
374 &mut hash_short,
375 &mut hash_long,
376 &mut sequences,
377 );
378 block_encoder::encode_compressed_block(
379 &input[offset..offset + chunk_size],
380 &sequences,
381 &mut rep_offsets,
382 is_last,
383 &mut output,
384 &mut workspace,
385 );
386 offset += chunk_size;
387 }
388 }
389 }
390 }
391 }
392
393 let hash = xxh64(input, 0);
394 let checksum = (hash & 0xFFFF_FFFF) as u32;
395 output.extend_from_slice(&checksum.to_le_bytes());
396
397 Ok(output)
398}
399
400pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
401 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
402 clamp_params_to_src_size(&mut params, input.len());
403 let mut buf = Vec::with_capacity(output.len());
404 compress_frame(input, ¶ms, &mut buf);
405 if buf.len() > output.len() {
406 return Err(CompressError::OutputTooSmall);
407 }
408 output[..buf.len()].copy_from_slice(&buf);
409 Ok(buf.len())
410}