1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(feature = "nightly", feature(optimize_attribute))]
3
4#[cfg(feature = "alloc")]
5extern crate alloc;
6
7pub(crate) mod block_encoder;
8#[cfg(feature = "std")]
9pub mod context;
10pub(crate) mod dfast;
11pub(crate) mod fast;
12pub(crate) mod primitives;
13pub(crate) mod sequences;
14pub mod strategy;
15#[cfg(feature = "std")]
16pub mod streaming;
17
18#[cfg(feature = "alloc")]
19use alloc::vec;
20#[cfg(feature = "alloc")]
21use alloc::vec::Vec;
22
23use crate::strategy::Strategy;
24use zrip_core::error::CompressError;
25use zrip_core::frame::{MAX_BLOCK_SIZE, ZSTD_MAGIC};
26use zrip_core::xxhash::xxh64;
27
28pub(crate) fn write_frame_header(output: &mut Vec<u8>, content_size: usize, dict_id: Option<u32>) {
29 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
30
31 let fcs_size = if content_size <= 255 {
32 1
33 } else if content_size <= 0xFFFF + 256 {
34 2
35 } else if content_size <= 0xFFFF_FFFF {
36 4
37 } else {
38 8
39 };
40 let fcs_flag: u8 = match fcs_size {
41 1 => 0,
42 2 => 1,
43 4 => 2,
44 _ => 3,
45 };
46
47 let dict_id_flag: u8 = match dict_id {
48 None => 0,
49 Some(id) if id <= 0xFF => 1,
50 Some(id) if id <= 0xFFFF => 2,
51 Some(_) => 3,
52 };
53
54 let descriptor = 0x20 | 0x04 | (fcs_flag << 6) | dict_id_flag;
55 output.push(descriptor);
56
57 match dict_id {
58 Some(id) if id <= 0xFF => output.push(id as u8),
59 Some(id) if id <= 0xFFFF => output.extend_from_slice(&(id as u16).to_le_bytes()),
60 Some(id) => output.extend_from_slice(&id.to_le_bytes()),
61 None => {}
62 }
63
64 match fcs_size {
65 1 => output.push(content_size as u8),
66 2 => {
67 let v = (content_size - 256) as u16;
68 output.extend_from_slice(&v.to_le_bytes());
69 }
70 4 => output.extend_from_slice(&(content_size as u32).to_le_bytes()),
71 _ => output.extend_from_slice(&(content_size as u64).to_le_bytes()),
72 }
73}
74
75pub(crate) fn block_looks_incompressible(data: &[u8]) -> bool {
76 const SAMPLE: usize = 1024;
77 const DISTINCT_THRESHOLD: u32 = 200;
78 const MAX_FREQ_DENOM: u32 = 24;
79 if data.len() < SAMPLE {
80 return false;
81 }
82 let mut counts = [0u16; 256];
83 for &b in &data[..SAMPLE] {
84 counts[b as usize] += 1;
85 }
86 let mut distinct: u32 = 0;
87 let mut max_freq: u16 = 0;
88 for &c in &counts {
89 distinct += (c > 0) as u32;
90 max_freq = max_freq.max(c);
91 }
92 distinct >= DISTINCT_THRESHOLD && (max_freq as u32) <= SAMPLE as u32 / MAX_FREQ_DENOM
93}
94
95pub(crate) fn clamp_params_to_src_size(params: &mut strategy::LevelParams, src_len: usize) {
96 if src_len >= 2 {
97 let src_log = 32 - ((src_len as u32) - 1).leading_zeros();
98 params.hash_log = params.hash_log.min(src_log);
99 params.chain_log = params.chain_log.min(src_log);
100 params.window_log = params.window_log.min(src_log);
101 }
102}
103
104pub fn compress_with_params(
105 input: &[u8],
106 params: &strategy::LevelParams,
107) -> Result<Vec<u8>, CompressError> {
108 let mut params = *params;
109 clamp_params_to_src_size(&mut params, input.len());
110 compress_inner(input, ¶ms)
111}
112
113pub fn compress(input: &[u8], level: i32) -> Result<Vec<u8>, CompressError> {
114 let params = strategy::level_params_for_size(level, input.len())
115 .ok_or(CompressError::InvalidLevel(level))?;
116 compress_inner(input, ¶ms)
117}
118
119#[allow(clippy::unnecessary_wraps)]
120fn compress_inner(input: &[u8], params: &strategy::LevelParams) -> Result<Vec<u8>, CompressError> {
121 let mut output = Vec::with_capacity(input.len() + 32);
122 compress_frame(input, params, &mut output);
123 Ok(output)
124}
125
126fn compress_frame(input: &[u8], params: &strategy::LevelParams, output: &mut Vec<u8>) {
127 write_frame_header(output, input.len(), None);
128
129 if input.is_empty() {
130 block_encoder::encode_raw_block(&[], true, output);
131 } else {
132 let hash_size = 1usize << params.hash_log;
133 let mut rep_offsets = [1u32, 4, 8];
134 let mut offset = 0;
135 let mut sequences = Vec::with_capacity(MAX_BLOCK_SIZE / 8);
136 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
137
138 match params.strategy {
139 Strategy::Fast => {
140 let mut hash_table = vec![0u32; hash_size];
141 while offset < input.len() {
142 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
143 let block_end = offset + chunk_size;
144 let is_last = block_end >= input.len();
145
146 if block_looks_incompressible(&input[offset..block_end]) {
147 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
148 } else {
149 fast::compress_fast_block(
150 input,
151 offset,
152 block_end,
153 params,
154 &rep_offsets,
155 &mut hash_table,
156 &mut sequences,
157 );
158 if params.force_raw_literals {
159 block_encoder::encode_compressed_block_raw(
160 &input[offset..block_end],
161 &sequences,
162 &mut rep_offsets,
163 is_last,
164 output,
165 &mut workspace,
166 );
167 } else {
168 block_encoder::encode_compressed_block(
169 &input[offset..block_end],
170 &sequences,
171 &mut rep_offsets,
172 is_last,
173 output,
174 &mut workspace,
175 );
176 }
177 }
178 offset = block_end;
179 }
180 }
181 Strategy::DFast => {
182 let short_size = 1usize << params.chain_log;
183 let long_size = 1usize << params.hash_log;
184 let mut hash_short = vec![0u32; short_size];
185 let mut hash_long = vec![0u32; long_size];
186 while offset < input.len() {
187 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
188 let block_end = offset + chunk_size;
189 let is_last = block_end >= input.len();
190
191 if block_looks_incompressible(&input[offset..block_end]) {
192 block_encoder::encode_raw_block(&input[offset..block_end], is_last, output);
193 } else {
194 dfast::compress_dfast_block(
195 input,
196 offset,
197 block_end,
198 params,
199 &rep_offsets,
200 &mut hash_short,
201 &mut hash_long,
202 &mut sequences,
203 );
204 block_encoder::encode_compressed_block(
205 &input[offset..block_end],
206 &sequences,
207 &mut rep_offsets,
208 is_last,
209 output,
210 &mut workspace,
211 );
212 }
213 offset = block_end;
214 }
215 }
216 }
217 }
218
219 let hash = xxh64(input, 0);
220 let checksum = (hash & 0xFFFF_FFFF) as u32;
221 output.extend_from_slice(&checksum.to_le_bytes());
222}
223
224pub fn compress_with_dict(
225 input: &[u8],
226 level: i32,
227 dict: &zrip_core::dict::Dictionary,
228) -> Result<Vec<u8>, CompressError> {
229 let total_window = dict.content().len() + input.len();
230 let params = strategy::level_params_for_size(level, total_window)
231 .ok_or(CompressError::InvalidLevel(level))?;
232
233 let mut output = Vec::with_capacity(input.len() + 32);
234 write_frame_header(&mut output, input.len(), Some(dict.id()));
235
236 if input.is_empty() {
237 block_encoder::encode_raw_block(&[], true, &mut output);
238 } else {
239 let prefix = dict.content();
240 let mut rep_offsets = *dict.rep_offsets();
241 let mut workspace = block_encoder::BlockEncodeWorkspace::new();
242
243 workspace.prev_ll = dict
244 .ll_table()
245 .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 35));
246 workspace.prev_of = dict
247 .of_table()
248 .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 31));
249 workspace.prev_ml = dict
250 .ml_table()
251 .map(|(dt, al)| block_encoder::FseEncodeTable::from_decode_table(dt, al, 52));
252 workspace.prev_huffman = dict.huf_table().and_then(|(dt, tl)| {
253 zrip_core::huffman::encode::HuffmanEncodeTable::from_decode_table(dt, tl)
254 });
255
256 if input.len() <= MAX_BLOCK_SIZE {
257 let sequences = match params.strategy {
258 Strategy::Fast => {
259 fast::compress_fast_with_prefix(input, ¶ms, &rep_offsets, prefix)
260 }
261 Strategy::DFast => {
262 dfast::compress_dfast_with_prefix(input, ¶ms, &rep_offsets, prefix)
263 }
264 };
265 if params.force_raw_literals {
266 block_encoder::encode_compressed_block_raw(
267 input,
268 &sequences,
269 &mut rep_offsets,
270 true,
271 &mut output,
272 &mut workspace,
273 );
274 } else {
275 block_encoder::encode_compressed_block(
276 input,
277 &sequences,
278 &mut rep_offsets,
279 true,
280 &mut output,
281 &mut workspace,
282 );
283 }
284 } else {
285 let mut combined = Vec::with_capacity(prefix.len() + input.len());
286 combined.extend_from_slice(prefix);
287 combined.extend_from_slice(input);
288 let plen = prefix.len();
289 let hash_size = 1usize << params.hash_log;
290 let mut sequences = Vec::new();
291
292 match params.strategy {
293 Strategy::Fast => {
294 let mut hash_table = vec![0u32; hash_size];
295 fast::prefill_hash_table(&combined, plen, params.hash_log, &mut hash_table);
296 let mut offset = 0;
297 while offset < input.len() {
298 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
299 let is_last = offset + chunk_size >= input.len();
300 fast::compress_fast_block(
301 &combined,
302 plen + offset,
303 plen + offset + chunk_size,
304 ¶ms,
305 &rep_offsets,
306 &mut hash_table,
307 &mut sequences,
308 );
309 if params.force_raw_literals {
310 block_encoder::encode_compressed_block_raw(
311 &input[offset..offset + chunk_size],
312 &sequences,
313 &mut rep_offsets,
314 is_last,
315 &mut output,
316 &mut workspace,
317 );
318 } else {
319 block_encoder::encode_compressed_block(
320 &input[offset..offset + chunk_size],
321 &sequences,
322 &mut rep_offsets,
323 is_last,
324 &mut output,
325 &mut workspace,
326 );
327 }
328 offset += chunk_size;
329 }
330 }
331 Strategy::DFast => {
332 let short_size = 1usize << params.chain_log;
333 let long_size = 1usize << params.hash_log;
334 let mut hash_short = vec![0u32; short_size];
335 let mut hash_long = vec![0u32; long_size];
336 dfast::prefill_hash_tables(
337 &combined,
338 plen,
339 params.hash_log,
340 params.chain_log,
341 params.min_match,
342 &mut hash_short,
343 &mut hash_long,
344 );
345 let mut offset = 0;
346 while offset < input.len() {
347 let chunk_size = (input.len() - offset).min(MAX_BLOCK_SIZE);
348 let is_last = offset + chunk_size >= input.len();
349 dfast::compress_dfast_block(
350 &combined,
351 plen + offset,
352 plen + offset + chunk_size,
353 ¶ms,
354 &rep_offsets,
355 &mut hash_short,
356 &mut hash_long,
357 &mut sequences,
358 );
359 block_encoder::encode_compressed_block(
360 &input[offset..offset + chunk_size],
361 &sequences,
362 &mut rep_offsets,
363 is_last,
364 &mut output,
365 &mut workspace,
366 );
367 offset += chunk_size;
368 }
369 }
370 }
371 }
372 }
373
374 let hash = xxh64(input, 0);
375 let checksum = (hash & 0xFFFF_FFFF) as u32;
376 output.extend_from_slice(&checksum.to_le_bytes());
377
378 Ok(output)
379}
380
381pub fn compress_into(input: &[u8], output: &mut [u8], level: i32) -> Result<usize, CompressError> {
382 let params = strategy::level_params_for_size(level, input.len())
383 .ok_or(CompressError::InvalidLevel(level))?;
384 let mut buf = Vec::with_capacity(output.len());
385 compress_frame(input, ¶ms, &mut buf);
386 if buf.len() > output.len() {
387 return Err(CompressError::OutputTooSmall);
388 }
389 output[..buf.len()].copy_from_slice(&buf);
390 Ok(buf.len())
391}