1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod primitives;
13pub(crate) mod sequences;
14pub mod strategy;
15#[cfg(feature = "std")]
16pub mod streaming;
17
18#[cfg(feature = "alloc")]
19use alloc::vec;
20#[cfg(feature = "alloc")]
21use alloc::vec::Vec;
22
23use crate::strategy::Strategy;
24use zrip_core::error::CompressError;
25use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
26use zrip_core::xxhash::xxh64;
27
28pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
29 const SAMPLE: usize = 1024;
30 const DISTINCT_THRESHOLD: u32 = 200;
31 const MAX_FREQ_DENOM: u32 = 24;
32 if data.len() < SAMPLE {
33 return false;
34 }
35 let mut counts = [0u16; 256];
36 for &b in &data[..SAMPLE] {
37 counts[b as usize] += 1;
38 }
39 let mut distinct: u32 = 0;
40 let mut max_freq: u16 = 0;
41 for &c in &counts {
42 distinct += (c > 0) as u32;
43 max_freq = max_freq.max(c);
44 }
45 distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
46}
47
48pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
49 if src_len >= 2 {
50 let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
51 params.hash_log = params.hash_log.min(src_log);
52 params.chain_log = params.chain_log.min(src_log);
53 params.window_log = params.window_log.min(src_log);
54 }
55}
56
57pub fn compress_with_params(
58 input: &[u8],
59 params: &strategy::LevelParams,
60) -> Result<Vec<u8>, CompressError> {
61 let mut params = *params;
62 clamp_params_to_src_size(&mut params, input.len());
63 compress_inner(input, ¶ms)
64}
65
66pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
67 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
68 clamp_params_to_src_size(&mut params, input.len());
69 compress_inner(input, ¶ms)
70}
71
72fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
73 let mut output = Vec::with_capacity(input.len() + 32);
74 compress_frame(input, params, &mut output);
75 Ok(output)
76}
77
78fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
79 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
80
81 let fcs_size = if input.len() <= 255 {
82 1
83 } else if input.len() <= 0xFFFF + 256 {
84 2
85 } else if input.len() <= 0xFFFFFFFF {
86 4
87 } else {
88 8
89 };
90
91 let fcs_flag = match fcs_size {
92 1 => 0,
93 2 => 1,
94 4 => 2,
95 8 => 3,
96 _ => unreachable!(),
97 };
98
99 let descriptor = 0x20 | 0x04 | (fcs_flag << 6);
100 output.push(descriptor);
101
102 match fcs_size {
103 1 => output.push(input.len() as u8),
104 2 => {
105 let v = (input.len() - 256) as u16;
106 output.extend_from_slice(&v.to_le_bytes());
107 }
108 4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
109 8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
110 _ => unreachable!(),
111 }
112
113 if input.is_empty() {
114 block_encoder::encode_raw_block(&[], true, output);
115 } else {
116 let hash_size = 1usize << params.hash_log;
117 let mut rep_offsets = [1u32, 4, 8];
118 let mut offset = 0;
119 let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
120 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
121
122 match params.strategy {
123 Strategy::Fast => {
124 let mut hash_table = vec![0u32; hash_size];
125 while offset < input.len() {
126 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
127 let block_end = offset + chunk_size;
128 let is_last = block_end >= input.len();
129
130 if block_looks_incompressible(&input[offset..block_end]) {
131 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
132 } else {
133 fast::compress_fast_block(
134 input,
135 offset,
136 block_end,
137 params,
138 &rep_offsets,
139 &mut hash_table,
140 &mut sequences,
141 );
142 if params.force_raw_literals {
143 block_encoder::encode_compressed_block_raw(
144 &input[offset..block_end],
145 &sequences,
146 &mut rep_offsets,
147 is_last,
148 output,
149 &mut workspace,
150 );
151 } else {
152 block_encoder::encode_compressed_block(
153 &input[offset..block_end],
154 &sequences,
155 &mut rep_offsets,
156 is_last,
157 output,
158 &mut workspace,
159 );
160 }
161 }
162 offset = block_end;
163 }
164 }
165 Strategy::DFast => {
166 let short_size = 1usize << params.chain_log;
167 let long_size = 1usize << params.hash_log;
168 let mut hash_short = vec![0u32; short_size];
169 let mut hash_long = vec![0u32; long_size];
170 while offset < input.len() {
171 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
172 let block_end = offset + chunk_size;
173 let is_last = block_end >= input.len();
174
175 if block_looks_incompressible(&input[offset..block_end]) {
176 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
177 } else {
178 dfast::compress_dfast_block(
179 input,
180 offset,
181 block_end,
182 params,
183 &rep_offsets,
184 &mut hash_short,
185 &mut hash_long,
186 &mut sequences,
187 );
188 block_encoder::encode_compressed_block(
189 &input[offset..block_end],
190 &sequences,
191 &mut rep_offsets,
192 is_last,
193 output,
194 &mut workspace,
195 );
196 }
197 offset = block_end;
198 }
199 }
200 }
201 }
202
203 let hash = xxh64(input, 0);
204 let checksum = (hash & 0xFFFFFFFF) as u32;
205 output.extend_from_slice(&checksum.to_le_bytes());
206}
207
208pub fn compress_with_dict(
209 input: &[u8],
210 level: i32,
211 dict: &zrip_core::dict::Dictionary,
212) -> Result<Vec<u8>, CompressError> {
213 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
214 clamp_params_to_src_size(&mut params, input.len());
215
216 let mut output = Vec::with_capacity(input.len() + 32);
217
218 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
219
220 let fcs_size = if input.len() <= 255 {
221 1
222 } else if input.len() <= 0xFFFF + 256 {
223 2
224 } else if input.len() <= 0xFFFFFFFF {
225 4
226 } else {
227 8
228 };
229
230 let fcs_flag = match fcs_size {
231 1 => 0,
232 2 => 1,
233 4 => 2,
234 8 => 3,
235 _ => unreachable!(),
236 };
237
238 let dict_id = dict.id();
239 let dict_id_flag = if dict_id <= 0xFF {
240 1u8
241 } else if dict_id <= 0xFFFF {
242 2
243 } else {
244 3
245 };
246
247 let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
248 output.push(descriptor);
249
250 match dict_id_flag {
251 1 => output.push(dict_id as u8),
252 2 => output.extend_from_slice(&(dict_id as u16).to_le_bytes()),
253 3 => output.extend_from_slice(&dict_id.to_le_bytes()),
254 _ => unreachable!(),
255 }
256
257 match fcs_size {
258 1 => output.push(input.len() as u8),
259 2 => {
260 let v = (input.len() - 256) as u16;
261 output.extend_from_slice(&v.to_le_bytes());
262 }
263 4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
264 8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
265 _ => unreachable!(),
266 }
267
268 if input.is_empty() {
269 block_encoder::encode_raw_block(&[], true, &mut output);
270 } else {
271 let prefix = dict.content();
272 let mut rep_offsets = *dict.rep_offsets();
273 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
274
275 if input.len() <= MAX_BLOCK_SIZE {
276 let sequences = match params.strategy {
277 Strategy::Fast => {
278 fast::compress_fast_with_prefix(input, ¶ms, &rep_offsets, prefix)
279 }
280 Strategy::DFast => {
281 dfast::compress_dfast_with_prefix(input, ¶ms, &rep_offsets, prefix)
282 }
283 };
284 if params.force_raw_literals {
285 block_encoder::encode_compressed_block_raw(
286 input,
287 &sequences,
288 &mut rep_offsets,
289 true,
290 &mut output,
291 &mut workspace,
292 );
293 } else {
294 block_encoder::encode_compressed_block(
295 input,
296 &sequences,
297 &mut rep_offsets,
298 true,
299 &mut output,
300 &mut workspace,
301 );
302 }
303 } else {
304 let mut combined = Vec::with_capacity(prefix.len() + input.len());
305 combined.extend_from_slice(prefix);
306 combined.extend_from_slice(input);
307 let plen = prefix.len();
308 let hash_size = 1usize << params.hash_log;
309 let mut sequences = Vec::new();
310
311 match params.strategy {
312 Strategy::Fast => {
313 let mut hash_table = vec![0u32; hash_size];
314 fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
315 let mut offset = 0;
316 while offset < input.len() {
317 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
318 let is_last = offset + chunk_size >= input.len();
319 fast::compress_fast_block(
320 &combined,
321 plen + offset,
322 plen + offset + chunk_size,
323 ¶ms,
324 &rep_offsets,
325 &mut hash_table,
326 &mut sequences,
327 );
328 if params.force_raw_literals {
329 block_encoder::encode_compressed_block_raw(
330 &input[offset..offset + chunk_size],
331 &sequences,
332 &mut rep_offsets,
333 is_last,
334 &mut output,
335 &mut workspace,
336 );
337 } else {
338 block_encoder::encode_compressed_block(
339 &input[offset..offset + chunk_size],
340 &sequences,
341 &mut rep_offsets,
342 is_last,
343 &mut output,
344 &mut workspace,
345 );
346 }
347 offset += chunk_size;
348 }
349 }
350 Strategy::DFast => {
351 let short_size = 1usize << params.chain_log;
352 let long_size = 1usize << params.hash_log;
353 let mut hash_short = vec![0u32; short_size];
354 let mut hash_long = vec![0u32; long_size];
355 dfast::prefill_hash_tables(
356 &combined,
357 plen,
358 params.hash_log,
359 params.chain_log,
360 &mut hash_short,
361 &mut hash_long,
362 );
363 let mut offset = 0;
364 while offset < input.len() {
365 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
366 let is_last = offset + chunk_size >= input.len();
367 dfast::compress_dfast_block(
368 &combined,
369 plen + offset,
370 plen + offset + chunk_size,
371 ¶ms,
372 &rep_offsets,
373 &mut hash_short,
374 &mut hash_long,
375 &mut sequences,
376 );
377 block_encoder::encode_compressed_block(
378 &input[offset..offset + chunk_size],
379 &sequences,
380 &mut rep_offsets,
381 is_last,
382 &mut output,
383 &mut workspace,
384 );
385 offset += chunk_size;
386 }
387 }
388 }
389 }
390 }
391
392 let hash = xxh64(input, 0);
393 let checksum = (hash & 0xFFFFFFFF) as u32;
394 output.extend_from_slice(&checksum.to_le_bytes());
395
396 Ok(output)
397}
398
399pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
400 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
401 clamp_params_to_src_size(&mut params, input.len());
402 let mut buf = Vec::with_capacity(output.len());
403 compress_frame(input, ¶ms, &mut buf);
404 if buf.len() > output.len() {
405 return Err(CompressError::OutputTooSmall);
406 }
407 output[..buf.len()].copy_from_slice(&buf);
408 Ok(buf.len())
409}