haagenti_zstd/compress/
repeat_offset_finder.rs1use super::match_finder::{LazyMatchFinder, Match, MAX_MATCH_LENGTH, MIN_MATCH_LENGTH};
23
24const REP_OFFSET_BONUS: usize = 2;
27
28#[derive(Debug)]
33pub struct RepeatOffsetMatchFinder {
34 inner: LazyMatchFinder,
36 rep_offsets: [usize; 3],
39}
40
41impl RepeatOffsetMatchFinder {
42 pub fn new(search_depth: usize) -> Self {
44 Self {
45 inner: LazyMatchFinder::new(search_depth),
46 rep_offsets: [1, 4, 8], }
48 }
49
50 fn reset_rep_offsets(&mut self) {
52 self.rep_offsets = [1, 4, 8];
53 }
54
55 fn update_rep_offsets(&mut self, actual_offset: usize, literal_length: usize) {
60 if literal_length > 0 {
61 if actual_offset == self.rep_offsets[0] {
63 } else if actual_offset == self.rep_offsets[1] {
65 self.rep_offsets.swap(1, 0);
67 } else if actual_offset == self.rep_offsets[2] {
68 let temp = self.rep_offsets[2];
70 self.rep_offsets[2] = self.rep_offsets[1];
71 self.rep_offsets[1] = self.rep_offsets[0];
72 self.rep_offsets[0] = temp;
73 } else {
74 self.rep_offsets[2] = self.rep_offsets[1];
76 self.rep_offsets[1] = self.rep_offsets[0];
77 self.rep_offsets[0] = actual_offset;
78 }
79 } else {
80 if actual_offset == self.rep_offsets[1] {
82 self.rep_offsets.swap(0, 1);
83 } else if actual_offset == self.rep_offsets[2] {
84 let temp = self.rep_offsets[2];
85 self.rep_offsets[2] = self.rep_offsets[1];
86 self.rep_offsets[1] = self.rep_offsets[0];
87 self.rep_offsets[0] = temp;
88 } else if actual_offset == self.rep_offsets[0].saturating_sub(1).max(1) {
89 let new_offset = self.rep_offsets[0].saturating_sub(1).max(1);
90 self.rep_offsets[2] = self.rep_offsets[1];
91 self.rep_offsets[1] = self.rep_offsets[0];
92 self.rep_offsets[0] = new_offset;
93 } else {
94 self.rep_offsets[2] = self.rep_offsets[1];
96 self.rep_offsets[1] = self.rep_offsets[0];
97 self.rep_offsets[0] = actual_offset;
98 }
99 }
100 }
101
102 #[inline]
104 fn rep_offset_index(&self, offset: usize) -> Option<usize> {
105 if offset == self.rep_offsets[0] {
106 Some(0)
107 } else if offset == self.rep_offsets[1] {
108 Some(1)
109 } else if offset == self.rep_offsets[2] {
110 Some(2)
111 } else {
112 None
113 }
114 }
115
116 #[inline]
120 fn probe_offset(&self, input: &[u8], pos: usize, offset: usize) -> usize {
121 if offset == 0 || pos < offset {
122 return 0;
123 }
124
125 let match_pos = pos - offset;
126 let remaining = input.len() - pos;
127 let max_len = remaining.min(MAX_MATCH_LENGTH);
128
129 if max_len < MIN_MATCH_LENGTH {
130 return 0;
131 }
132
133 if pos + 4 <= input.len() && match_pos + 4 <= input.len() {
135 let cur = unsafe { std::ptr::read_unaligned(input.as_ptr().add(pos) as *const u32) };
136 let prev =
137 unsafe { std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u32) };
138
139 if cur != prev {
140 return 0;
141 }
142
143 let mut len = 4;
145 while len < max_len && input[match_pos + len] == input[pos + len] {
146 len += 1;
147 }
148 len
149 } else {
150 let mut len = 0;
152 while len < max_len && input[match_pos + len] == input[pos + len] {
153 len += 1;
154 }
155 if len >= MIN_MATCH_LENGTH {
156 len
157 } else {
158 0
159 }
160 }
161 }
162
163 fn find_best_match_with_rep(
168 &mut self,
169 input: &[u8],
170 pos: usize,
171 _literal_length: usize,
172 ) -> Option<Match> {
173 let mut best_match: Option<Match> = None;
174 let mut best_score: usize = 0;
175
176 for (rep_idx, &rep_offset) in self.rep_offsets.iter().enumerate() {
178 let len = self.probe_offset(input, pos, rep_offset);
179 if len >= MIN_MATCH_LENGTH {
180 let bonus = REP_OFFSET_BONUS + (2 - rep_idx);
183 let score = len + bonus;
184
185 if score > best_score {
186 best_score = score;
187 best_match = Some(Match::new(pos, rep_offset, len));
188 }
189 }
190 }
191
192 if best_score >= MIN_MATCH_LENGTH + REP_OFFSET_BONUS + 8 {
194 return best_match;
195 }
196
197 let hash = if pos + 4 <= input.len() {
200 self.inner.inner.hash4(input, pos)
201 } else if pos + 3 <= input.len() {
202 self.inner.inner.hash3(input, pos)
203 } else {
204 return best_match;
205 };
206
207 if let Some(hash_match) = self.inner.inner.find_best_match(input, pos, hash as usize) {
208 let _hash_score = hash_match.length;
210
211 let hash_is_rep = self.rep_offset_index(hash_match.offset);
213 let hash_score = if hash_is_rep.is_some() {
214 hash_match.length + REP_OFFSET_BONUS
215 } else {
216 hash_match.length
217 };
218
219 if hash_score > best_score {
220 best_match = Some(hash_match);
221 }
222 }
223
224 best_match
225 }
226
227 pub fn find_matches(&mut self, input: &[u8]) -> Vec<Match> {
229 if input.len() < MIN_MATCH_LENGTH {
230 return Vec::new();
231 }
232
233 self.reset_rep_offsets();
234 self.inner.inner.reset(input.len());
235 self.inner.configure_for_size(input.len());
236
237 let mut matches = Vec::with_capacity(input.len() / 16);
238 let mut pos = 0;
239 let end = input.len().saturating_sub(MIN_MATCH_LENGTH);
240 let mut literal_run = 0usize; while pos <= end {
243 if let Some(m) = self.find_best_match_with_rep(input, pos, literal_run) {
244 matches.push(m);
245
246 self.update_rep_offsets(m.offset, literal_run);
248
249 let skip_end = (pos + m.length).min(end);
251 for update_pos in pos..skip_end.min(pos + 8) {
252 if update_pos + 4 <= input.len() {
253 let h = self.inner.inner.hash4(input, update_pos);
254 self.inner.inner.update_hash(input, update_pos, h as usize);
255 }
256 }
257
258 pos += m.length;
259 literal_run = 0;
260 } else {
261 if pos + 4 <= input.len() {
263 let h = self.inner.inner.hash4(input, pos);
264 self.inner.inner.update_hash(input, pos, h as usize);
265 }
266 pos += 1;
267 literal_run += 1;
268 }
269 }
270
271 matches
272 }
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 #[test]
280 fn test_repeat_offset_finder_basic() {
281 let mut finder = RepeatOffsetMatchFinder::new(16);
282
283 let input = b"abcdabcdabcdabcd";
285 let matches = finder.find_matches(input);
286
287 assert!(!matches.is_empty());
289
290 if let Some(m) = matches.first() {
292 assert_eq!(m.offset, 4);
293 }
294 }
295
296 #[test]
297 fn test_repeat_offset_tracking() {
298 let mut finder = RepeatOffsetMatchFinder::new(16);
299
300 assert_eq!(finder.rep_offsets, [1, 4, 8]);
302
303 finder.update_rep_offsets(100, 5);
305 assert_eq!(finder.rep_offsets, [100, 1, 4]);
306
307 finder.update_rep_offsets(100, 3);
309 assert_eq!(finder.rep_offsets, [100, 1, 4]);
310
311 finder.update_rep_offsets(1, 2);
313 assert_eq!(finder.rep_offsets, [1, 100, 4]);
314 }
315
316 #[test]
317 fn test_rep_offset_bonus() {
318 let mut finder = RepeatOffsetMatchFinder::new(16);
319
320 let input = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
322 let matches = finder.find_matches(input);
323
324 if let Some(m) = matches.first() {
326 assert_eq!(m.offset, 1);
327 assert!(m.length > 3);
328 }
329 }
330}