prs_rs/impls/comp/
lz77_matcher.rs1use super::comp_dict::CompDict;
2use crate::prelude::Allocator;
3use core::mem::size_of;
4use core::ptr::read_unaligned;
5
6pub trait Lz77Parameters {
12 const MAX_OFFSET: usize;
15 const MAX_LENGTH: usize;
17}
18
19#[inline(never)] pub unsafe fn lz77_get_longest_match_fast<
35 P: Lz77Parameters,
36 L: Allocator + Copy,
37 S: Allocator + Copy,
38>(
39 dict: &mut CompDict<L, S>,
40 source_ptr: *const u8,
41 source_index: usize,
42) -> Lz77Match {
43 let mut best_match = Lz77Match {
44 offset: 0,
45 length: 0,
46 };
47
48 let min_offset = source_index.saturating_sub(P::MAX_OFFSET);
50
51 let key = read_unaligned(source_ptr.add(source_index) as *const u16);
53
54 let offsets = dict.get_item(key, min_offset, source_index.saturating_sub(1));
56 for &match_offset in offsets.iter().rev() {
57 let match_offset = match_offset as usize;
58
59 let mut match_length = 2;
61
62 let offset_src_ptr = source_ptr.add(match_offset);
64 let offset_dst_ptr = source_ptr.add(source_index);
65 let initial_match = read_unaligned(offset_src_ptr as *const u32)
66 == read_unaligned(offset_dst_ptr as *const u32);
67
68 if !initial_match {
69 match_length +=
71 (*offset_src_ptr.add(match_length) == *offset_dst_ptr.add(match_length)) as usize;
72 } else {
73 match_length = 4;
74
75 debug_assert!(P::MAX_LENGTH % size_of::<usize>() == 0);
78
79 while match_length < (P::MAX_LENGTH - size_of::<u32>())
81 && read_unaligned(offset_src_ptr.add(match_length) as *const usize)
82 == read_unaligned(offset_dst_ptr.add(match_length) as *const usize)
83 {
84 match_length += size_of::<usize>();
85 }
86
87 while match_length < P::MAX_LENGTH
88 && *offset_src_ptr.add(match_length) == *offset_dst_ptr.add(match_length)
89 {
90 match_length += 1;
91 }
92 }
93
94 if match_length > best_match.length {
96 best_match.length = match_length;
97 best_match.offset = match_offset as isize - source_index as isize;
98
99 if match_length == P::MAX_LENGTH {
100 break;
101 }
102 }
103 }
104
105 best_match
106}
107
108#[inline(never)] pub unsafe fn lz77_get_longest_match_slow<
123 P: Lz77Parameters,
124 L: Allocator + Copy,
125 S: Allocator + Copy,
126>(
127 dict: &mut CompDict<L, S>,
128 source_ptr: *const u8,
129 source_len: usize,
130 source_index: usize,
131) -> Lz77Match {
132 let mut best_match = Lz77Match {
133 offset: 0,
134 length: 0,
135 };
136
137 let min_offset = source_index.saturating_sub(P::MAX_OFFSET);
139
140 let key = read_unaligned(source_ptr.add(source_index) as *const u16);
142
143 let max_match_length = P::MAX_LENGTH.min(source_len - source_index);
145
146 let offsets = dict.get_item(key, min_offset, source_index.saturating_sub(1));
148 for &match_offset in offsets.iter().rev() {
149 let match_offset = match_offset as usize;
150
151 let mut match_length = 2;
153 let offset_src_ptr = source_ptr.add(match_offset);
154 let offset_dst_ptr = source_ptr.add(source_index);
155 while match_length < max_match_length
156 && *offset_src_ptr.add(match_length) == *offset_dst_ptr.add(match_length)
157 {
158 match_length += 1;
159 }
160
161 if match_length > best_match.length {
163 best_match.length = match_length;
164 best_match.offset = match_offset as isize - source_index as isize;
165
166 if match_length == max_match_length {
167 break;
168 }
169 }
170 }
171
172 best_match
173}
174
175pub struct Lz77Match {
177 pub offset: isize,
179 pub length: usize,
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186 use crate::test_prelude::*;
187
188 #[test]
189 fn test_longest_match_with_repetition() {
190 let data = b"abcabcabcabcabc";
191 let mut dict = CompDict::new(data.len());
192 unsafe { dict.init(data, 0) }
193
194 let match_result = unsafe {
196 lz77_get_longest_match_slow::<CompressParameters, Global, Global>(
197 &mut dict,
198 data.as_ptr(),
199 data.len(),
200 3,
201 )
202 };
203 assert_eq!(match_result.length, 12);
204 assert_eq!(match_result.offset, -3);
205 }
206
207 #[test]
208 fn test_no_match() {
209 let data = b"abcdefgh";
210 let mut dict = CompDict::new(data.len());
211 unsafe { dict.init(data, 0) }
212
213 let match_result = unsafe {
215 lz77_get_longest_match_slow::<CompressParameters, Global, Global>(
216 &mut dict,
217 data.as_ptr(),
218 data.len(),
219 2,
220 )
221 };
222 assert_eq!(match_result.length, 0);
223 }
224
225 #[test]
226 fn test_multiple_matches() {
227 let data = b"ababababab";
228 let mut dict = CompDict::new(data.len());
229 unsafe { dict.init(data, 0) }
230
231 let match_result = unsafe {
233 lz77_get_longest_match_slow::<CompressParameters, Global, Global>(
234 &mut dict,
235 data.as_ptr(),
236 data.len(),
237 2,
238 )
239 };
240 assert_eq!(match_result.length, 8);
241 assert_eq!(match_result.offset, -2);
242 }
243
244 #[test]
245 fn test_boundary_conditions() {
246 let data = b"ababababab";
247 let mut dict = CompDict::new(data.len());
248 unsafe { dict.init(data, 0) }
249
250 let match_result = unsafe {
252 lz77_get_longest_match_slow::<CompressParameters, Global, Global>(
253 &mut dict,
254 data.as_ptr(),
255 data.len(),
256 data.len() - 3,
257 )
258 };
259 assert_eq!(match_result.length, 3);
260 assert_eq!(match_result.offset, -2);
261 }
262
263 #[test]
264 fn test_last_match_on_boundary() {
265 let data = b"acacacabab";
266 let mut dict = CompDict::new(data.len());
267 unsafe { dict.init(data, 0) }
268
269 let match_result = unsafe {
271 lz77_get_longest_match_slow::<CompressParameters, Global, Global>(
272 &mut dict,
273 data.as_ptr(),
274 data.len(),
275 data.len() - 2,
276 )
277 };
278 assert_eq!(match_result.length, 2);
279 assert_eq!(match_result.offset, -2);
280 }
281
282 struct CompressParameters;
283 impl Lz77Parameters for CompressParameters {
284 const MAX_OFFSET: usize = 0x1FFF;
285 const MAX_LENGTH: usize = 256;
286 }
287}