1use alloc::{vec, vec::Vec};
2use core::ops::Deref;
3
4use super::{bt4::Bt4, extend_match, hc4::Hc4};
5use crate::Write;
6
7const MOVE_BLOCK_ALIGN: i32 = 64;
9const MOVE_BLOCK_ALIGN_MASK: i32 = !(MOVE_BLOCK_ALIGN - 1);
10
11pub(crate) trait MatchFind {
12 fn find_matches(&mut self, encoder: &mut LzEncoderData, matches: &mut Matches);
13 fn skip(&mut self, encoder: &mut LzEncoderData, len: usize);
14}
15
16pub(crate) enum MatchFinders {
17 Hc4(Hc4),
18 Bt4(Bt4),
19}
20
21impl MatchFind for MatchFinders {
22 fn find_matches(&mut self, encoder: &mut LzEncoderData, matches: &mut Matches) {
23 match self {
24 MatchFinders::Hc4(m) => m.find_matches(encoder, matches),
25 MatchFinders::Bt4(m) => m.find_matches(encoder, matches),
26 }
27 }
28
29 fn skip(&mut self, encoder: &mut LzEncoderData, len: usize) {
30 match self {
31 MatchFinders::Hc4(m) => m.skip(encoder, len),
32 MatchFinders::Bt4(m) => m.skip(encoder, len),
33 }
34 }
35}
36
37#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
39pub enum MfType {
40 #[default]
42 Hc4,
43 Bt4,
45}
46
47impl MfType {
48 #[inline]
49 fn get_memory_usage(self, dict_size: u32) -> u32 {
50 match self {
51 MfType::Hc4 => Hc4::get_mem_usage(dict_size),
52 MfType::Bt4 => Bt4::get_mem_usage(dict_size),
53 }
54 }
55}
56
57pub(crate) struct LzEncoder {
58 pub(crate) data: LzEncoderData,
59 pub(crate) matches: Matches,
60 pub(crate) match_finder: MatchFinders,
61}
62
63pub(crate) struct LzEncoderData {
64 pub(crate) keep_size_before: u32,
65 pub(crate) keep_size_after: u32,
66 pub(crate) match_len_max: u32,
67 pub(crate) nice_len: u32,
68 pub(crate) buf: Vec<u8>,
69 pub(crate) buf_size: usize,
70 pub(crate) buf_limit_u16: usize,
71 pub(crate) read_pos: i32,
72 pub(crate) read_limit: i32,
73 pub(crate) finishing: bool,
74 pub(crate) write_pos: i32,
75 pub(crate) pending_size: u32,
76}
77
78pub(crate) struct Matches {
79 pub(crate) len: Vec<u32>,
80 pub(crate) dist: Vec<i32>,
81 pub(crate) count: u32,
82}
83
84impl Matches {
85 pub(crate) fn new(count_max: usize) -> Self {
86 Self {
87 len: vec![0; count_max],
88 dist: vec![0; count_max],
89 count: 0,
90 }
91 }
92}
93
94impl LzEncoder {
95 pub(crate) fn get_memory_usage(
96 dict_size: u32,
97 extra_size_before: u32,
98 extra_size_after: u32,
99 match_len_max: u32,
100 mf: MfType,
101 ) -> u32 {
102 get_buf_size(
103 dict_size,
104 extra_size_before,
105 extra_size_after,
106 match_len_max,
107 ) + mf.get_memory_usage(dict_size)
108 }
109
110 pub(crate) fn new_hc4(
111 dict_size: u32,
112 extra_size_before: u32,
113 extra_size_after: u32,
114 nice_len: u32,
115 match_len_max: u32,
116 depth_limit: i32,
117 ) -> Self {
118 Self::new(
119 dict_size,
120 extra_size_before,
121 extra_size_after,
122 nice_len,
123 match_len_max,
124 MatchFinders::Hc4(Hc4::new(dict_size, nice_len, depth_limit)),
125 )
126 }
127
128 pub(crate) fn new_bt4(
129 dict_size: u32,
130 extra_size_before: u32,
131 extra_size_after: u32,
132 nice_len: u32,
133 match_len_max: u32,
134 depth_limit: i32,
135 ) -> Self {
136 Self::new(
137 dict_size,
138 extra_size_before,
139 extra_size_after,
140 nice_len,
141 match_len_max,
142 MatchFinders::Bt4(Bt4::new(dict_size, nice_len, depth_limit)),
143 )
144 }
145
146 fn new(
147 dict_size: u32,
148 extra_size_before: u32,
149 extra_size_after: u32,
150 nice_len: u32,
151 match_len_max: u32,
152 match_finder: MatchFinders,
153 ) -> Self {
154 let buf_size = get_buf_size(
155 dict_size,
156 extra_size_before,
157 extra_size_after,
158 match_len_max,
159 );
160 let buf_size = buf_size as usize;
161 let buf = vec![0; buf_size];
162 let buf_limit_u16 = buf_size.checked_sub(size_of::<u16>()).unwrap();
163
164 let keep_size_before = extra_size_before + dict_size;
165 let keep_size_after = extra_size_after + match_len_max;
166
167 Self {
168 data: LzEncoderData {
169 keep_size_before,
170 keep_size_after,
171 match_len_max,
172 nice_len,
173 buf,
174 buf_size,
175 buf_limit_u16,
176 read_pos: -1,
177 read_limit: -1,
178 finishing: false,
179 write_pos: 0,
180 pending_size: 0,
181 },
182 matches: Matches::new(nice_len as usize - 1),
183 match_finder,
184 }
185 }
186
187 pub(crate) fn normalize(positions: &mut [i32], norm_offset: i32) {
188 #[cfg(all(feature = "std", feature = "optimization", target_arch = "x86_64"))]
189 {
190 if std::arch::is_x86_feature_detected!("avx2") {
191 return unsafe { normalize_avx2(positions, norm_offset) };
193 }
194 if std::arch::is_x86_feature_detected!("sse4.1") {
195 return unsafe { normalize_sse41(positions, norm_offset) };
197 }
198 }
199
200 #[cfg(all(feature = "std", feature = "optimization", target_arch = "aarch64"))]
201 {
202 if std::arch::is_aarch64_feature_detected!("neon") {
203 return unsafe { normalize_neon(positions, norm_offset) };
205 }
206 }
207
208 normalize_scalar(positions, norm_offset);
209 }
210
211 pub(crate) fn find_matches(&mut self) {
212 self.match_finder
213 .find_matches(&mut self.data, &mut self.matches)
214 }
215
216 pub(crate) fn matches(&mut self) -> &mut Matches {
217 &mut self.matches
218 }
219
220 pub(crate) fn skip(&mut self, len: usize) {
221 self.match_finder.skip(&mut self.data, len)
222 }
223
224 pub(crate) fn set_preset_dict(&mut self, dict_size: u32, preset_dict: &[u8]) {
225 self.data
226 .set_preset_dict(dict_size, preset_dict, &mut self.match_finder)
227 }
228
229 pub(crate) fn set_finishing(&mut self) {
230 self.data.set_finishing(&mut self.match_finder)
231 }
232
233 pub(crate) fn fill_window(&mut self, input: &[u8]) -> usize {
234 self.data.fill_window(input, &mut self.match_finder)
235 }
236
237 pub(crate) fn set_flushing(&mut self) {
238 self.data.set_flushing(&mut self.match_finder)
239 }
240
241 pub(crate) fn verify_matches(&self) -> bool {
242 self.data.verify_matches(&self.matches)
243 }
244}
245
246impl LzEncoderData {
247 pub(crate) fn is_started(&self) -> bool {
248 self.read_pos != -1
249 }
250
251 pub(crate) fn read_buffer(&self) -> &[u8] {
252 &self.buf[self.read_pos as usize..]
253 }
254
255 fn set_preset_dict(
256 &mut self,
257 dict_size: u32,
258 preset_dict: &[u8],
259 match_finder: &mut dyn MatchFind,
260 ) {
261 debug_assert!(!self.is_started());
262 debug_assert_eq!(self.write_pos, 0);
263 let copy_size = preset_dict.len().min(dict_size as usize);
264 let offset = preset_dict.len() - copy_size;
265 self.buf[0..copy_size].copy_from_slice(&preset_dict[offset..(offset + copy_size)]);
266 self.write_pos += copy_size as i32;
267 match_finder.skip(self, copy_size);
268 }
269
270 fn move_window(&mut self) {
271 let move_offset =
272 (self.read_pos + 1 - self.keep_size_before as i32) & MOVE_BLOCK_ALIGN_MASK;
273 let move_size = self.write_pos - move_offset;
274
275 debug_assert!(move_size >= 0);
276 debug_assert!(move_offset >= 0);
277
278 let move_size = move_size as usize;
279 let offset = move_offset as usize;
280
281 self.buf.copy_within(offset..offset + move_size, 0);
282
283 self.read_pos -= move_offset;
284 self.read_limit -= move_offset;
285 self.write_pos -= move_offset;
286 }
287
288 fn fill_window(&mut self, input: &[u8], match_finder: &mut dyn MatchFind) -> usize {
289 debug_assert!(!self.finishing);
290 if self.read_pos >= (self.buf_size as i32 - self.keep_size_after as i32) {
291 self.move_window();
292 }
293 let len = if input.len() as i32 > self.buf_size as i32 - self.write_pos {
294 (self.buf_size as i32 - self.write_pos) as usize
295 } else {
296 input.len()
297 };
298 let d_start = self.write_pos as usize;
299 let d_end = d_start + len;
300 self.buf[d_start..d_end].copy_from_slice(&input[..len]);
301 self.write_pos += len as i32;
302 if self.write_pos >= self.keep_size_after as i32 {
303 self.read_limit = self.write_pos - self.keep_size_after as i32;
304 }
305 self.process_pending_bytes(match_finder);
306 len
307 }
308
309 fn process_pending_bytes(&mut self, match_finder: &mut dyn MatchFind) {
310 if self.pending_size > 0 && self.read_pos < self.read_limit {
311 self.read_pos -= self.pending_size as i32;
312 let old_pending = self.pending_size;
313 self.pending_size = 0;
314 match_finder.skip(self, old_pending as _);
315 debug_assert!(self.pending_size < old_pending)
316 }
317 }
318
319 fn set_flushing(&mut self, match_finder: &mut dyn MatchFind) {
320 self.read_limit = self.write_pos - 1;
321 self.process_pending_bytes(match_finder);
322 }
323
324 fn set_finishing(&mut self, match_finder: &mut dyn MatchFind) {
325 self.read_limit = self.write_pos - 1;
326 self.finishing = true;
327 self.process_pending_bytes(match_finder);
328 }
329
330 pub fn has_enough_data(&self, already_read_len: i32) -> bool {
331 self.read_pos - already_read_len < self.read_limit
332 }
333
334 pub(crate) fn copy_uncompressed<W: Write>(
335 &self,
336 out: &mut W,
337 backward: i32,
338 len: usize,
339 ) -> crate::Result<()> {
340 let start = (self.read_pos + 1 - backward) as usize;
341 out.write_all(&self.buf[start..(start + len)])
342 }
343
344 #[inline(always)]
345 pub(crate) fn get_avail(&self) -> i32 {
346 debug_assert_ne!(self.read_pos, -1);
347 self.write_pos - self.read_pos
348 }
349
350 #[inline(always)]
351 pub(crate) fn get_pos(&self) -> i32 {
352 self.read_pos
353 }
354
355 #[inline(always)]
356 pub(crate) fn get_byte(&self, forward: i32, backward: i32) -> u8 {
357 self.buf[(self.read_pos + forward - backward) as usize]
358 }
359
360 #[inline(always)]
361 pub(crate) fn get_byte_by_pos(&self, pos: i32) -> u8 {
362 self.buf[pos as usize]
363 }
364
365 #[inline(always)]
366 pub(crate) fn get_byte_backward(&self, backward: i32) -> u8 {
367 self.buf[(self.read_pos - backward) as usize]
368 }
369
370 #[inline(always)]
371 pub(crate) fn get_current_byte(&self) -> u8 {
372 self.buf[self.read_pos as usize]
373 }
374
375 #[inline(always)]
376 pub(crate) fn get_match_len(&self, dist: i32, len_limit: i32) -> usize {
377 extend_match(&self.buf, self.read_pos, 0, dist + 1, len_limit) as usize
378 }
379
380 #[inline(always)]
381 pub(crate) fn get_match_len2(&self, forward: i32, dist: i32, len_limit: i32) -> u32 {
382 if len_limit <= 0 {
383 return 0;
384 }
385 extend_match(&self.buf, self.read_pos + forward, 0, dist + 1, len_limit) as u32
386 }
387
388 #[inline(always)]
389 pub(crate) fn get_match_len_fast_reject<const MATCH_LEN_MIN: usize>(
390 &self,
391 dist: i32,
392 len_limit: i32,
393 ) -> usize {
394 let match_dist = dist + 1;
395 let read_pos = self.read_pos as usize;
396
397 #[cfg(feature = "optimization")]
399 unsafe {
400 let clamped0 = read_pos.min(self.buf_limit_u16);
402 let clamped1 = (read_pos - match_dist as usize).min(self.buf_limit_u16);
403
404 if core::ptr::read_unaligned(self.buf.as_ptr().add(clamped0) as *const u16)
405 != core::ptr::read_unaligned(self.buf.as_ptr().add(clamped1) as *const u16)
406 {
407 return 0;
408 }
409 }
410 #[cfg(not(feature = "optimization"))]
411 if self.buf[read_pos] != self.buf[read_pos - match_dist as usize]
412 || self.buf[read_pos + 1] != self.buf[read_pos + 1 - match_dist as usize]
413 {
414 return 0;
415 }
416
417 extend_match(&self.buf, self.read_pos, 2, match_dist, len_limit) as usize
418 }
419
420 fn verify_matches(&self, matches: &Matches) -> bool {
421 let len_limit = self.get_avail().min(self.match_len_max as i32);
422
423 for i in 0..matches.count as usize {
424 let match_distance = matches.dist[i] + 1;
425 let actual_len = extend_match(&self.buf, self.read_pos, 0, match_distance, len_limit);
426
427 if actual_len as u32 != matches.len[i] {
428 return false;
429 }
430 }
431
432 true
433 }
434
435 pub(crate) fn move_pos(
436 &mut self,
437 required_for_flushing: i32,
438 required_for_finishing: i32,
439 ) -> i32 {
440 debug_assert!(required_for_flushing >= required_for_finishing);
441 self.read_pos += 1;
442 let mut avail = self.write_pos - self.read_pos;
443 if avail < required_for_flushing && (avail < required_for_finishing || !self.finishing) {
444 self.pending_size += 1;
445 avail = 0;
446 }
447 avail
448 }
449}
450
451impl Deref for LzEncoder {
452 type Target = LzEncoderData;
453
454 fn deref(&self) -> &Self::Target {
455 &self.data
456 }
457}
458
459fn get_buf_size(
460 dict_size: u32,
461 extra_size_before: u32,
462 extra_size_after: u32,
463 match_len_max: u32,
464) -> u32 {
465 let keep_size_before = extra_size_before + dict_size;
466 let keep_size_after = extra_size_after + match_len_max;
467 let reserve_size = (dict_size / 2 + (256 << 10)).min(512 << 20);
468 keep_size_before + keep_size_after + reserve_size
469}
470
471#[inline(always)]
472fn normalize_scalar(positions: &mut [i32], norm_offset: i32) {
473 positions
474 .iter_mut()
475 .for_each(|p| *p = p.saturating_sub(norm_offset));
476}
477
478#[cfg(all(feature = "std", feature = "optimization", target_arch = "aarch64"))]
480#[target_feature(enable = "neon")]
481unsafe fn normalize_neon(positions: &mut [i32], norm_offset: i32) {
482 use core::arch::aarch64::*;
483
484 let norm_v = vdupq_n_s32(norm_offset);
486
487 let (prefix, chunks, suffix) = positions.align_to_mut::<int32x4_t>();
490
491 normalize_scalar(prefix, norm_offset);
492
493 for chunk in chunks {
494 let ptr = chunk as *mut int32x4_t as *mut i32;
495
496 let data = vld1q_s32(ptr);
497
498 let max_val = vmaxq_s32(data, norm_v);
500 let result = vsubq_s32(max_val, norm_v);
501
502 vst1q_s32(ptr, result);
503 }
504
505 normalize_scalar(suffix, norm_offset);
506}
507
508#[cfg(all(feature = "std", feature = "optimization", target_arch = "x86_64"))]
510#[target_feature(enable = "avx2")]
511unsafe fn normalize_avx2(positions: &mut [i32], norm_offset: i32) {
512 use core::arch::x86_64::*;
513
514 let norm_v = _mm256_set1_epi32(norm_offset);
516
517 let (prefix, chunks, suffix) = positions.align_to_mut::<__m256i>();
519
520 normalize_scalar(prefix, norm_offset);
521
522 for chunk in chunks {
523 let data = _mm256_load_si256(chunk as *mut _);
526
527 let max_val = _mm256_max_epi32(data, norm_v);
529 let result = _mm256_sub_epi32(max_val, norm_v);
530
531 _mm256_store_si256(chunk as *mut _, result);
533 }
534
535 normalize_scalar(suffix, norm_offset);
536}
537
538#[cfg(all(feature = "std", feature = "optimization", target_arch = "x86_64"))]
540#[target_feature(enable = "sse4.1")]
541unsafe fn normalize_sse41(positions: &mut [i32], norm_offset: i32) {
542 use core::arch::x86_64::*;
543
544 let norm_v = _mm_set1_epi32(norm_offset);
546
547 let (prefix, chunks, suffix) = positions.align_to_mut::<__m128i>();
549
550 normalize_scalar(prefix, norm_offset);
551
552 for chunk in chunks {
554 let data = _mm_load_si128(chunk as *mut _);
556
557 let max_val = _mm_max_epi32(data, norm_v);
558 let result = _mm_sub_epi32(max_val, norm_v);
559
560 _mm_store_si128(chunk as *mut _, result);
562 }
563
564 normalize_scalar(suffix, norm_offset);
565}