use super::match_finder::{LazyMatchFinder, Match, MAX_MATCH_LENGTH, MIN_MATCH_LENGTH};
const REP_OFFSET_BONUS: usize = 2;
#[derive(Debug)]
pub struct RepeatOffsetMatchFinder {
inner: LazyMatchFinder,
rep_offsets: [usize; 3],
}
impl RepeatOffsetMatchFinder {
pub fn new(search_depth: usize) -> Self {
Self {
inner: LazyMatchFinder::new(search_depth),
rep_offsets: [1, 4, 8], }
}
fn reset_rep_offsets(&mut self) {
self.rep_offsets = [1, 4, 8];
}
fn update_rep_offsets(&mut self, actual_offset: usize, literal_length: usize) {
if literal_length > 0 {
if actual_offset == self.rep_offsets[0] {
} else if actual_offset == self.rep_offsets[1] {
self.rep_offsets.swap(1, 0);
} else if actual_offset == self.rep_offsets[2] {
let temp = self.rep_offsets[2];
self.rep_offsets[2] = self.rep_offsets[1];
self.rep_offsets[1] = self.rep_offsets[0];
self.rep_offsets[0] = temp;
} else {
self.rep_offsets[2] = self.rep_offsets[1];
self.rep_offsets[1] = self.rep_offsets[0];
self.rep_offsets[0] = actual_offset;
}
} else {
if actual_offset == self.rep_offsets[1] {
self.rep_offsets.swap(0, 1);
} else if actual_offset == self.rep_offsets[2] {
let temp = self.rep_offsets[2];
self.rep_offsets[2] = self.rep_offsets[1];
self.rep_offsets[1] = self.rep_offsets[0];
self.rep_offsets[0] = temp;
} else if actual_offset == self.rep_offsets[0].saturating_sub(1).max(1) {
let new_offset = self.rep_offsets[0].saturating_sub(1).max(1);
self.rep_offsets[2] = self.rep_offsets[1];
self.rep_offsets[1] = self.rep_offsets[0];
self.rep_offsets[0] = new_offset;
} else {
self.rep_offsets[2] = self.rep_offsets[1];
self.rep_offsets[1] = self.rep_offsets[0];
self.rep_offsets[0] = actual_offset;
}
}
}
#[inline]
fn rep_offset_index(&self, offset: usize) -> Option<usize> {
if offset == self.rep_offsets[0] {
Some(0)
} else if offset == self.rep_offsets[1] {
Some(1)
} else if offset == self.rep_offsets[2] {
Some(2)
} else {
None
}
}
#[inline]
fn probe_offset(&self, input: &[u8], pos: usize, offset: usize) -> usize {
if offset == 0 || pos < offset {
return 0;
}
let match_pos = pos - offset;
let remaining = input.len() - pos;
let max_len = remaining.min(MAX_MATCH_LENGTH);
if max_len < MIN_MATCH_LENGTH {
return 0;
}
if pos + 4 <= input.len() && match_pos + 4 <= input.len() {
let cur = unsafe { std::ptr::read_unaligned(input.as_ptr().add(pos) as *const u32) };
let prev =
unsafe { std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u32) };
if cur != prev {
return 0;
}
let mut len = 4;
while len < max_len && input[match_pos + len] == input[pos + len] {
len += 1;
}
len
} else {
let mut len = 0;
while len < max_len && input[match_pos + len] == input[pos + len] {
len += 1;
}
if len >= MIN_MATCH_LENGTH {
len
} else {
0
}
}
}
fn find_best_match_with_rep(
&mut self,
input: &[u8],
pos: usize,
_literal_length: usize,
) -> Option<Match> {
let mut best_match: Option<Match> = None;
let mut best_score: usize = 0;
for (rep_idx, &rep_offset) in self.rep_offsets.iter().enumerate() {
let len = self.probe_offset(input, pos, rep_offset);
if len >= MIN_MATCH_LENGTH {
let bonus = REP_OFFSET_BONUS + (2 - rep_idx);
let score = len + bonus;
if score > best_score {
best_score = score;
best_match = Some(Match::new(pos, rep_offset, len));
}
}
}
if best_score >= MIN_MATCH_LENGTH + REP_OFFSET_BONUS + 8 {
return best_match;
}
let hash = if pos + 4 <= input.len() {
self.inner.inner.hash4(input, pos)
} else if pos + 3 <= input.len() {
self.inner.inner.hash3(input, pos)
} else {
return best_match;
};
if let Some(hash_match) = self.inner.inner.find_best_match(input, pos, hash as usize) {
let _hash_score = hash_match.length;
let hash_is_rep = self.rep_offset_index(hash_match.offset);
let hash_score = if hash_is_rep.is_some() {
hash_match.length + REP_OFFSET_BONUS
} else {
hash_match.length
};
if hash_score > best_score {
best_match = Some(hash_match);
}
}
best_match
}
pub fn find_matches(&mut self, input: &[u8]) -> Vec<Match> {
if input.len() < MIN_MATCH_LENGTH {
return Vec::new();
}
self.reset_rep_offsets();
self.inner.inner.reset(input.len());
self.inner.configure_for_size(input.len());
let mut matches = Vec::with_capacity(input.len() / 16);
let mut pos = 0;
let end = input.len().saturating_sub(MIN_MATCH_LENGTH);
let mut literal_run = 0usize;
while pos <= end {
if let Some(m) = self.find_best_match_with_rep(input, pos, literal_run) {
matches.push(m);
self.update_rep_offsets(m.offset, literal_run);
let skip_end = (pos + m.length).min(end);
for update_pos in pos..skip_end.min(pos + 8) {
if update_pos + 4 <= input.len() {
let h = self.inner.inner.hash4(input, update_pos);
self.inner.inner.update_hash(input, update_pos, h as usize);
}
}
pos += m.length;
literal_run = 0;
} else {
if pos + 4 <= input.len() {
let h = self.inner.inner.hash4(input, pos);
self.inner.inner.update_hash(input, pos, h as usize);
}
pos += 1;
literal_run += 1;
}
}
matches
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_repeat_offset_finder_basic() {
let mut finder = RepeatOffsetMatchFinder::new(16);
let input = b"abcdabcdabcdabcd";
let matches = finder.find_matches(input);
assert!(!matches.is_empty());
if let Some(m) = matches.first() {
assert_eq!(m.offset, 4);
}
}
#[test]
fn test_repeat_offset_tracking() {
let mut finder = RepeatOffsetMatchFinder::new(16);
assert_eq!(finder.rep_offsets, [1, 4, 8]);
finder.update_rep_offsets(100, 5);
assert_eq!(finder.rep_offsets, [100, 1, 4]);
finder.update_rep_offsets(100, 3);
assert_eq!(finder.rep_offsets, [100, 1, 4]);
finder.update_rep_offsets(1, 2);
assert_eq!(finder.rep_offsets, [1, 100, 4]);
}
#[test]
fn test_rep_offset_bonus() {
let mut finder = RepeatOffsetMatchFinder::new(16);
let input = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
let matches = finder.find_matches(input);
if let Some(m) = matches.first() {
assert_eq!(m.offset, 1);
assert!(m.length > 3);
}
}
}