1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3#![cfg_attr(feature = "paranoid", forbid(unsafe_code))]
4
5#[cfg(feature = "alloc")]
6extern crate alloc;
7
8pub(crate) mod block_encoder;
9#[cfg(feature = "std")]
10pub mod context;
11pub(crate) mod dfast;
12pub(crate) mod fast;
13pub(crate) mod primitives;
14pub(crate) mod sequences;
15pub mod strategy;
16#[cfg(feature = "std")]
17pub mod streaming;
18
19#[cfg(feature = "alloc")]
20use alloc::vec;
21#[cfg(feature = "alloc")]
22use alloc::vec::Vec;
23
24use crate::strategy::Strategy;
25use zrip_core::error::CompressError;
26use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
27use zrip_core::xxhash::xxh64;
28
29pub(crate) fn write_frame_header(output: &mut Vec<u8>, content_size: usize, dict_id: Option<u32>) {
30 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
31
32 let fcs_size = if content_size <= 255 {
33 1
34 } else if content_size <= 0xFFFF + 256 {
35 2
36 } else if content_size <= 0xFFFF_FFFF {
37 4
38 } else {
39 8
40 };
41 let fcs_flag: u8 = match fcs_size {
42 1 => 0,
43 2 => 1,
44 4 => 2,
45 _ => 3,
46 };
47
48 let dict_id_flag: u8 = match dict_id {
49 None => 0,
50 Some(id) if id <= 0xFF => 1,
51 Some(id) if id <= 0xFFFF => 2,
52 Some(_) => 3,
53 };
54
55 let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
56 output.push(descriptor);
57
58 match dict_id {
59 Some(id) if id <= 0xFF => output.push(id as u8),
60 Some(id) if id <= 0xFFFF => output.extend_from_slice(&(id as u16).to_le_bytes()),
61 Some(id) => output.extend_from_slice(&id.to_le_bytes()),
62 None => {}
63 }
64
65 match fcs_size {
66 1 => output.push(content_size as u8),
67 2 => {
68 let v = (content_size - 256) as u16;
69 output.extend_from_slice(&v.to_le_bytes());
70 }
71 4 => output.extend_from_slice(&(content_size as u32).to_le_bytes()),
72 _ => output.extend_from_slice(&(content_size as u64).to_le_bytes()),
73 }
74}
75
76pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
77 const SAMPLE: usize = 1024;
78 const DISTINCT_THRESHOLD: u32 = 200;
79 const MAX_FREQ_DENOM: u32 = 24;
80 if data.len() < SAMPLE {
81 return false;
82 }
83 let mut counts = [0u16; 256];
84 for &b in &data[..SAMPLE] {
85 counts[b as usize] += 1;
86 }
87 let mut distinct: u32 = 0;
88 let mut max_freq: u16 = 0;
89 for &c in &counts {
90 distinct += (c > 0) as u32;
91 max_freq = max_freq.max(c);
92 }
93 distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
94}
95
96pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
97 if src_len >= 2 {
98 let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
99 params.hash_log = params.hash_log.min(src_log);
100 params.chain_log = params.chain_log.min(src_log);
101 params.window_log = params.window_log.min(src_log);
102 }
103}
104
105pub fn compress_with_params(
106 input: &[u8],
107 params: &strategy::LevelParams,
108) -> Result<Vec<u8>, CompressError> {
109 let mut params = *params;
110 clamp_params_to_src_size(&mut params, input.len());
111 compress_inner(input, ¶ms)
112}
113
114pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
115 let params = strategy::level_params_for_size(level, input.len())
116 .ok_or(CompressError::InvalidLevel(level))?;
117 compress_inner(input, ¶ms)
118}
119
120#[allow(clippy::unnecessary_wraps)]
121fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
122 let mut output = Vec::with_capacity(input.len() + 32);
123 compress_frame(input, params, &mut output);
124 Ok(output)
125}
126
127fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
128 write_frame_header(output, input.len(), None);
129
130 if input.is_empty() {
131 block_encoder::encode_raw_block(&[], true, output);
132 } else {
133 let hash_size = 1usize << params.hash_log;
134 let mut rep_offsets = [1u32, 4, 8];
135 let mut offset = 0;
136 let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
137 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
138
139 match params.strategy {
140 Strategy::Fast => {
141 let mut hash_table = vec![0u32; hash_size];
142 while offset < input.len() {
143 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
144 let block_end = offset + chunk_size;
145 let is_last = block_end >= input.len();
146
147 if block_looks_incompressible(&input[offset..block_end]) {
148 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
149 } else {
150 fast::compress_fast_block(
151 input,
152 offset,
153 block_end,
154 params,
155 &rep_offsets,
156 &mut hash_table,
157 &mut sequences,
158 );
159 if params.force_raw_literals {
160 block_encoder::encode_compressed_block_raw(
161 &input[offset..block_end],
162 &sequences,
163 &mut rep_offsets,
164 is_last,
165 output,
166 &mut workspace,
167 );
168 } else {
169 block_encoder::encode_compressed_block(
170 &input[offset..block_end],
171 &sequences,
172 &mut rep_offsets,
173 is_last,
174 output,
175 &mut workspace,
176 );
177 }
178 }
179 offset = block_end;
180 }
181 }
182 Strategy::DFast => {
183 let short_size = 1usize << params.chain_log;
184 let long_size = 1usize << params.hash_log;
185 let mut hash_short = vec![0u32; short_size];
186 let mut hash_long = vec![0u32; long_size];
187 while offset < input.len() {
188 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
189 let block_end = offset + chunk_size;
190 let is_last = block_end >= input.len();
191
192 if block_looks_incompressible(&input[offset..block_end]) {
193 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
194 } else {
195 dfast::compress_dfast_block(
196 input,
197 offset,
198 block_end,
199 params,
200 &rep_offsets,
201 &mut hash_short,
202 &mut hash_long,
203 &mut sequences,
204 );
205 block_encoder::encode_compressed_block(
206 &input[offset..block_end],
207 &sequences,
208 &mut rep_offsets,
209 is_last,
210 output,
211 &mut workspace,
212 );
213 }
214 offset = block_end;
215 }
216 }
217 }
218 }
219
220 let hash = xxh64(input, 0);
221 let checksum = (hash & 0xFFFF_FFFF) as u32;
222 output.extend_from_slice(&checksum.to_le_bytes());
223}
224
225pub fn compress_with_dict(
226 input: &[u8],
227 level: i32,
228 dict: &zrip_core::dict::Dictionary,
229) -> Result<Vec<u8>, CompressError> {
230 let total_window = dict.content().len() + input.len();
231 let params = strategy::level_params_for_size(level, total_window)
232 .ok_or(CompressError::InvalidLevel(level))?;
233
234 let mut output = Vec::with_capacity(input.len() + 32);
235 write_frame_header(&mut output, input.len(), Some(dict.id()));
236
237 if input.is_empty() {
238 block_encoder::encode_raw_block(&[], true, &mut output);
239 } else {
240 let prefix = dict.content();
241 let mut rep_offsets = *dict.rep_offsets();
242 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
243
244 workspace.prev_ll = dict
245 .ll_table()
246 .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 35));
247 workspace.prev_of = dict
248 .of_table()
249 .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 31));
250 workspace.prev_ml = dict
251 .ml_table()
252 .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 52));
253 workspace.prev_huffman = dict.huf_table().and_then(|(dt, tl)| {
254 zrip_core::huffman::encode::HuffmanEncodeTable::from_decode_table(dt, tl)
255 });
256
257 if input.len() <= MAX_BLOCK_SIZE {
258 let sequences = match params.strategy {
259 Strategy::Fast => {
260 fast::compress_fast_with_prefix(input, ¶ms, &rep_offsets, prefix)
261 }
262 Strategy::DFast => {
263 dfast::compress_dfast_with_prefix(input, ¶ms, &rep_offsets, prefix)
264 }
265 };
266 if params.force_raw_literals {
267 block_encoder::encode_compressed_block_raw(
268 input,
269 &sequences,
270 &mut rep_offsets,
271 true,
272 &mut output,
273 &mut workspace,
274 );
275 } else {
276 block_encoder::encode_compressed_block(
277 input,
278 &sequences,
279 &mut rep_offsets,
280 true,
281 &mut output,
282 &mut workspace,
283 );
284 }
285 } else {
286 let mut combined = Vec::with_capacity(prefix.len() + input.len());
287 combined.extend_from_slice(prefix);
288 combined.extend_from_slice(input);
289 let plen = prefix.len();
290 let hash_size = 1usize << params.hash_log;
291 let mut sequences = Vec::new();
292
293 match params.strategy {
294 Strategy::Fast => {
295 let mut hash_table = vec![0u32; hash_size];
296 fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
297 let mut offset = 0;
298 while offset < input.len() {
299 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
300 let is_last = offset + chunk_size >= input.len();
301 fast::compress_fast_block(
302 &combined,
303 plen + offset,
304 plen + offset + chunk_size,
305 ¶ms,
306 &rep_offsets,
307 &mut hash_table,
308 &mut sequences,
309 );
310 if params.force_raw_literals {
311 block_encoder::encode_compressed_block_raw(
312 &input[offset..offset + chunk_size],
313 &sequences,
314 &mut rep_offsets,
315 is_last,
316 &mut output,
317 &mut workspace,
318 );
319 } else {
320 block_encoder::encode_compressed_block(
321 &input[offset..offset + chunk_size],
322 &sequences,
323 &mut rep_offsets,
324 is_last,
325 &mut output,
326 &mut workspace,
327 );
328 }
329 offset += chunk_size;
330 }
331 }
332 Strategy::DFast => {
333 let short_size = 1usize << params.chain_log;
334 let long_size = 1usize << params.hash_log;
335 let mut hash_short = vec![0u32; short_size];
336 let mut hash_long = vec![0u32; long_size];
337 dfast::prefill_hash_tables(
338 &combined,
339 plen,
340 params.hash_log,
341 params.chain_log,
342 params.min_match,
343 &mut hash_short,
344 &mut hash_long,
345 );
346 let mut offset = 0;
347 while offset < input.len() {
348 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
349 let is_last = offset + chunk_size >= input.len();
350 dfast::compress_dfast_block(
351 &combined,
352 plen + offset,
353 plen + offset + chunk_size,
354 ¶ms,
355 &rep_offsets,
356 &mut hash_short,
357 &mut hash_long,
358 &mut sequences,
359 );
360 block_encoder::encode_compressed_block(
361 &input[offset..offset + chunk_size],
362 &sequences,
363 &mut rep_offsets,
364 is_last,
365 &mut output,
366 &mut workspace,
367 );
368 offset += chunk_size;
369 }
370 }
371 }
372 }
373 }
374
375 let hash = xxh64(input, 0);
376 let checksum = (hash & 0xFFFF_FFFF) as u32;
377 output.extend_from_slice(&checksum.to_le_bytes());
378
379 Ok(output)
380}
381
382pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
383 let params = strategy::level_params_for_size(level, input.len())
384 .ok_or(CompressError::InvalidLevel(level))?;
385 let mut buf = Vec::with_capacity(output.len());
386 compress_frame(input, ¶ms, &mut buf);
387 if buf.len() > output.len() {
388 return Err(CompressError::OutputTooSmall);
389 }
390 output[..buf.len()].copy_from_slice(&buf);
391 Ok(buf.len())
392}