1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod sequences;
13pub mod strategy;
14#[cfg(feature = "std")]
15pub mod streaming;
16
17#[cfg(feature = "alloc")]
18use alloc::vec::Vec;
19
20use crate::strategy::Strategy;
21use zrip_core::error::CompressError;
22use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
23use zrip_core::xxhash::xxh64;
24
25pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
26 const SAMPLE: usize = 1024;
27 const DISTINCT_THRESHOLD: u32 = 200;
28 const MAX_FREQ_DENOM: u32 = 24;
29 if data.len() < SAMPLE {
30 return false;
31 }
32 let mut counts = [0u16; 256];
33 for &b in &data[..SAMPLE] {
34 counts[b as usize] += 1;
35 }
36 let mut distinct: u32 = 0;
37 let mut max_freq: u16 = 0;
38 for &c in &counts {
39 distinct += (c > 0) as u32;
40 max_freq = max_freq.max(c);
41 }
42 distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
43}
44
45pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
46 if src_len >= 2 {
47 let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
48 params.hash_log = params.hash_log.min(src_log);
49 params.chain_log = params.chain_log.min(src_log);
50 params.window_log = params.window_log.min(src_log);
51 }
52}
53
54pub fn compress_with_params(
55 input: &[u8],
56 params: &strategy::LevelParams,
57) -> Result<Vec<u8>, CompressError> {
58 let mut params = *params;
59 clamp_params_to_src_size(&mut params, input.len());
60 compress_inner(input, ¶ms)
61}
62
63pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
64 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
65 clamp_params_to_src_size(&mut params, input.len());
66 compress_inner(input, ¶ms)
67}
68
69fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
70 let mut output = Vec::with_capacity(input.len() + 32);
71 compress_frame(input, params, &mut output);
72 Ok(output)
73}
74
75fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
76 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
77
78 let fcs_size = if input.len() <= 255 {
79 1
80 } else if input.len() <= 0xFFFF + 256 {
81 2
82 } else if input.len() <= 0xFFFFFFFF {
83 4
84 } else {
85 8
86 };
87
88 let fcs_flag = match fcs_size {
89 1 => 0,
90 2 => 1,
91 4 => 2,
92 8 => 3,
93 _ => unreachable!(),
94 };
95
96 let descriptor = 0x20 | 0x04 | (fcs_flag << 6);
97 output.push(descriptor);
98
99 match fcs_size {
100 1 => output.push(input.len() as u8),
101 2 => {
102 let v = (input.len() - 256) as u16;
103 output.extend_from_slice(&v.to_le_bytes());
104 }
105 4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
106 8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
107 _ => unreachable!(),
108 }
109
110 if input.is_empty() {
111 block_encoder::encode_raw_block(&[], true, output);
112 } else {
113 let hash_size = 1usize << params.hash_log;
114 let mut rep_offsets = [1u32, 4, 8];
115 let mut offset = 0;
116 let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
117 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
118
119 match params.strategy {
120 Strategy::Fast => {
121 let mut hash_table = vec![0u32; hash_size];
122 while offset < input.len() {
123 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
124 let block_end = offset + chunk_size;
125 let is_last = block_end >= input.len();
126
127 if block_looks_incompressible(&input[offset..block_end]) {
128 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
129 } else {
130 fast::compress_fast_block(
131 input,
132 offset,
133 block_end,
134 params,
135 &rep_offsets,
136 &mut hash_table,
137 &mut sequences,
138 );
139 if params.force_raw_literals {
140 block_encoder::encode_compressed_block_raw(
141 &input[offset..block_end],
142 &sequences,
143 &mut rep_offsets,
144 is_last,
145 output,
146 &mut workspace,
147 );
148 } else {
149 block_encoder::encode_compressed_block(
150 &input[offset..block_end],
151 &sequences,
152 &mut rep_offsets,
153 is_last,
154 output,
155 &mut workspace,
156 );
157 }
158 }
159 offset = block_end;
160 }
161 }
162 Strategy::DFast => {
163 let short_size = 1usize << params.chain_log;
164 let long_size = 1usize << params.hash_log;
165 let mut hash_short = vec![0u32; short_size];
166 let mut hash_long = vec![0u32; long_size];
167 while offset < input.len() {
168 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
169 let block_end = offset + chunk_size;
170 let is_last = block_end >= input.len();
171
172 if block_looks_incompressible(&input[offset..block_end]) {
173 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
174 } else {
175 dfast::compress_dfast_block(
176 input,
177 offset,
178 block_end,
179 params,
180 &rep_offsets,
181 &mut hash_short,
182 &mut hash_long,
183 &mut sequences,
184 );
185 block_encoder::encode_compressed_block(
186 &input[offset..block_end],
187 &sequences,
188 &mut rep_offsets,
189 is_last,
190 output,
191 &mut workspace,
192 );
193 }
194 offset = block_end;
195 }
196 }
197 }
198 }
199
200 let hash = xxh64(input, 0);
201 let checksum = (hash & 0xFFFFFFFF) as u32;
202 output.extend_from_slice(&checksum.to_le_bytes());
203}
204
205pub fn compress_with_dict(
206 input: &[u8],
207 level: i32,
208 dict: &zrip_core::dict::Dictionary,
209) -> Result<Vec<u8>, CompressError> {
210 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
211 clamp_params_to_src_size(&mut params, input.len());
212
213 let mut output = Vec::with_capacity(input.len() + 32);
214
215 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
216
217 let fcs_size = if input.len() <= 255 {
218 1
219 } else if input.len() <= 0xFFFF + 256 {
220 2
221 } else if input.len() <= 0xFFFFFFFF {
222 4
223 } else {
224 8
225 };
226
227 let fcs_flag = match fcs_size {
228 1 => 0,
229 2 => 1,
230 4 => 2,
231 8 => 3,
232 _ => unreachable!(),
233 };
234
235 let dict_id = dict.id();
236 let dict_id_flag = if dict_id <= 0xFF {
237 1u8
238 } else if dict_id <= 0xFFFF {
239 2
240 } else {
241 3
242 };
243
244 let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
245 output.push(descriptor);
246
247 match dict_id_flag {
248 1 => output.push(dict_id as u8),
249 2 => output.extend_from_slice(&(dict_id as u16).to_le_bytes()),
250 3 => output.extend_from_slice(&dict_id.to_le_bytes()),
251 _ => unreachable!(),
252 }
253
254 match fcs_size {
255 1 => output.push(input.len() as u8),
256 2 => {
257 let v = (input.len() - 256) as u16;
258 output.extend_from_slice(&v.to_le_bytes());
259 }
260 4 => output.extend_from_slice(&(input.len() as u32).to_le_bytes()),
261 8 => output.extend_from_slice(&(input.len() as u64).to_le_bytes()),
262 _ => unreachable!(),
263 }
264
265 if input.is_empty() {
266 block_encoder::encode_raw_block(&[], true, &mut output);
267 } else {
268 let prefix = dict.content();
269 let mut rep_offsets = *dict.rep_offsets();
270 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
271
272 if input.len() <= MAX_BLOCK_SIZE {
273 let sequences = match params.strategy {
274 Strategy::Fast => {
275 fast::compress_fast_with_prefix(input, ¶ms, &rep_offsets, prefix)
276 }
277 Strategy::DFast => {
278 dfast::compress_dfast_with_prefix(input, ¶ms, &rep_offsets, prefix)
279 }
280 };
281 if params.force_raw_literals {
282 block_encoder::encode_compressed_block_raw(
283 input,
284 &sequences,
285 &mut rep_offsets,
286 true,
287 &mut output,
288 &mut workspace,
289 );
290 } else {
291 block_encoder::encode_compressed_block(
292 input,
293 &sequences,
294 &mut rep_offsets,
295 true,
296 &mut output,
297 &mut workspace,
298 );
299 }
300 } else {
301 let mut combined = Vec::with_capacity(prefix.len() + input.len());
302 combined.extend_from_slice(prefix);
303 combined.extend_from_slice(input);
304 let plen = prefix.len();
305 let hash_size = 1usize << params.hash_log;
306 let mut sequences = Vec::new();
307
308 match params.strategy {
309 Strategy::Fast => {
310 let mut hash_table = vec![0u32; hash_size];
311 fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
312 let mut offset = 0;
313 while offset < input.len() {
314 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
315 let is_last = offset + chunk_size >= input.len();
316 fast::compress_fast_block(
317 &combined,
318 plen + offset,
319 plen + offset + chunk_size,
320 ¶ms,
321 &rep_offsets,
322 &mut hash_table,
323 &mut sequences,
324 );
325 if params.force_raw_literals {
326 block_encoder::encode_compressed_block_raw(
327 &input[offset..offset + chunk_size],
328 &sequences,
329 &mut rep_offsets,
330 is_last,
331 &mut output,
332 &mut workspace,
333 );
334 } else {
335 block_encoder::encode_compressed_block(
336 &input[offset..offset + chunk_size],
337 &sequences,
338 &mut rep_offsets,
339 is_last,
340 &mut output,
341 &mut workspace,
342 );
343 }
344 offset += chunk_size;
345 }
346 }
347 Strategy::DFast => {
348 let short_size = 1usize << params.chain_log;
349 let long_size = 1usize << params.hash_log;
350 let mut hash_short = vec![0u32; short_size];
351 let mut hash_long = vec![0u32; long_size];
352 dfast::prefill_hash_tables(
353 &combined,
354 plen,
355 params.hash_log,
356 params.chain_log,
357 &mut hash_short,
358 &mut hash_long,
359 );
360 let mut offset = 0;
361 while offset < input.len() {
362 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
363 let is_last = offset + chunk_size >= input.len();
364 dfast::compress_dfast_block(
365 &combined,
366 plen + offset,
367 plen + offset + chunk_size,
368 ¶ms,
369 &rep_offsets,
370 &mut hash_short,
371 &mut hash_long,
372 &mut sequences,
373 );
374 block_encoder::encode_compressed_block(
375 &input[offset..offset + chunk_size],
376 &sequences,
377 &mut rep_offsets,
378 is_last,
379 &mut output,
380 &mut workspace,
381 );
382 offset += chunk_size;
383 }
384 }
385 }
386 }
387 }
388
389 let hash = xxh64(input, 0);
390 let checksum = (hash & 0xFFFFFFFF) as u32;
391 output.extend_from_slice(&checksum.to_le_bytes());
392
393 Ok(output)
394}
395
396pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
397 let mut params = strategy::level_params(level).ok_or(CompressError::InvalidLevel(level))?;
398 clamp_params_to_src_size(&mut params, input.len());
399 let mut buf = Vec::with_capacity(output.len());
400 compress_frame(input, ¶ms, &mut buf);
401 if buf.len() > output.len() {
402 return Err(CompressError::OutputTooSmall);
403 }
404 output[..buf.len()].copy_from_slice(&buf);
405 Ok(buf.len())
406}