use super::super::match_table::helpers::common_prefix_len;
use super::table::LdmHashTable;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(crate) struct LdmMatch {
pub(crate) match_pos: usize,
pub(crate) forward_len: usize,
pub(crate) backward_len: usize,
}
impl LdmMatch {
pub(crate) const fn total_len(&self) -> usize {
self.forward_len + self.backward_len
}
}
pub(crate) fn count_backwards_match(
history: &[u8],
p_in_idx: usize,
anchor_idx: usize,
p_match_idx: usize,
match_base_idx: usize,
) -> usize {
debug_assert!(p_in_idx <= history.len());
debug_assert!(p_match_idx <= history.len());
debug_assert!(anchor_idx <= p_in_idx);
debug_assert!(match_base_idx <= p_match_idx);
let mut p_in = p_in_idx;
let mut p_match = p_match_idx;
let mut len = 0usize;
while p_in > anchor_idx && p_match > match_base_idx && history[p_in - 1] == history[p_match - 1]
{
p_in -= 1;
p_match -= 1;
len += 1;
}
len
}
pub(crate) struct FindBestMatchInputs<'a> {
pub(crate) live_history: &'a [u8],
pub(crate) history_abs_start: usize,
pub(crate) split_abs: usize,
pub(crate) anchor_abs: usize,
pub(crate) lowest_index_abs: usize,
pub(crate) iend_abs: usize,
pub(crate) min_match_length: usize,
}
pub(crate) fn find_best_match(
table: &LdmHashTable,
hash_id: u32,
checksum: u32,
inputs: FindBestMatchInputs<'_>,
) -> Option<LdmMatch> {
let FindBestMatchInputs {
live_history,
history_abs_start,
split_abs,
anchor_abs,
lowest_index_abs,
iend_abs,
min_match_length,
} = inputs;
debug_assert!(history_abs_start <= split_abs);
debug_assert!(split_abs <= history_abs_start + live_history.len());
debug_assert!(anchor_abs <= split_abs);
debug_assert!(history_abs_start <= anchor_abs);
debug_assert!(split_abs <= iend_abs);
debug_assert!(iend_abs <= history_abs_start + live_history.len());
let bucket = table.bucket(hash_id);
let mut best: Option<LdmMatch> = None;
let history_abs_end = history_abs_start + live_history.len();
let split_idx = split_abs - history_abs_start;
let split_idx_end = iend_abs - history_abs_start;
for entry in bucket {
if entry.checksum != checksum {
continue;
}
let Some(match_abs) = table.resolve(entry) else {
continue;
};
if match_abs < lowest_index_abs {
continue;
}
if match_abs < history_abs_start || match_abs >= history_abs_end {
continue;
}
if match_abs >= split_abs {
continue;
}
let match_idx = match_abs - history_abs_start;
let forward_len = common_prefix_len(
&live_history[split_idx..split_idx_end],
&live_history[match_idx..],
);
if forward_len < min_match_length {
continue;
}
let backward_len = count_backwards_match(
live_history,
split_idx,
anchor_abs - history_abs_start,
match_idx,
0, );
let candidate = LdmMatch {
match_pos: match_abs,
forward_len,
backward_len,
};
match best {
Some(b) if candidate.total_len() <= b.total_len() => {}
_ => best = Some(candidate),
}
}
best
}
#[cfg(test)]
mod tests {
use super::*;
use crate::encoding::ldm::table::LdmHashTable;
fn fresh_table() -> LdmHashTable {
LdmHashTable::new(4, 2)
}
#[test]
fn count_backwards_match_walks_until_mismatch_or_bound() {
let history = b"abc__abc";
let len = count_backwards_match(history, 8, 0, 3, 0);
assert_eq!(len, 3);
let len2 = count_backwards_match(history, 5, 0, 0, 0);
assert_eq!(len2, 0);
}
#[test]
fn count_backwards_match_respects_anchor_bound() {
let history = b"aaaaaaaa";
let len = count_backwards_match(history, 6, 5, 4, 0);
assert_eq!(len, 1);
}
#[test]
fn find_best_match_returns_none_on_checksum_mismatch() {
let mut table = fresh_table();
table.insert_absolute(1, 4, 0x1111_1111);
let history = b"abcdefghabcdefgh";
let m = find_best_match(
&table,
1,
0xDEAD_BEEF,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 8,
anchor_abs: 0,
lowest_index_abs: 0,
iend_abs: history.len(),
min_match_length: 4,
},
);
assert!(m.is_none(), "wrong checksum must be filtered out");
}
#[test]
fn find_best_match_rejects_stale_entries() {
let mut table = fresh_table();
table.insert_absolute(1, 4, 0xCAFE);
let history = b"abcdefghabcdefgh";
let m = find_best_match(
&table,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 8,
anchor_abs: 0,
lowest_index_abs: 5,
iend_abs: history.len(),
min_match_length: 4,
},
);
assert!(m.is_none(), "stale entry must be filtered out");
}
#[test]
fn find_best_match_picks_longest_combined_match() {
let mut table = fresh_table();
table.insert_absolute(1, 4, 0xCAFE);
let history = b"PPPPabcdefghabcdefgh";
let m = find_best_match(
&table,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 12,
anchor_abs: 12,
lowest_index_abs: 0,
iend_abs: history.len(),
min_match_length: 4,
},
)
.expect("a valid candidate must be found");
assert_eq!(m.match_pos, 4);
assert_eq!(m.forward_len, 8);
assert_eq!(m.backward_len, 0);
assert_eq!(m.total_len(), 8);
}
#[test]
fn find_best_match_extends_backwards_into_pre_split_bytes() {
let mut table = fresh_table();
table.insert_absolute(1, 2, 0xCAFE);
let history = b"XYabcdefghXYabcdefgh";
let m = find_best_match(
&table,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 12,
anchor_abs: 10,
lowest_index_abs: 0,
iend_abs: history.len(),
min_match_length: 4,
},
)
.expect("a valid candidate must be found");
assert_eq!(m.match_pos, 2);
assert_eq!(m.forward_len, 8);
assert_eq!(m.backward_len, 2);
assert_eq!(m.total_len(), 10);
}
#[test]
fn find_best_match_prefers_longer_total_across_slots() {
let mut table = fresh_table();
table.insert_absolute(1, 4, 0xCAFE);
table.insert_absolute(1, 8, 0xCAFE);
let history = b"PPPPabcdabcdefghabcdefgh";
let m = find_best_match(
&table,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 16,
anchor_abs: 16,
lowest_index_abs: 0,
iend_abs: history.len(),
min_match_length: 4,
},
)
.expect("a valid candidate must be found");
assert_eq!(m.match_pos, 8, "longer-forward winner must be picked");
assert_eq!(m.forward_len, 8);
}
#[test]
fn find_best_match_rejects_entries_at_or_past_split() {
let mut table = fresh_table();
table.insert_absolute(1, 12, 0xCAFE);
let history = b"PPPPabcdefghabcdefgh";
let m = find_best_match(
&table,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 12,
anchor_abs: 12,
lowest_index_abs: 0,
iend_abs: history.len(),
min_match_length: 4,
},
);
assert!(m.is_none(), "entry at split_abs must be rejected");
let mut table_after = fresh_table();
table_after.insert_absolute(1, 16, 0xCAFE);
let m_after = find_best_match(
&table_after,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 12,
anchor_abs: 12,
lowest_index_abs: 0,
iend_abs: history.len(),
min_match_length: 4,
},
);
assert!(m_after.is_none(), "entry past split_abs must be rejected");
}
#[test]
fn find_best_match_forward_count_is_bounded_by_iend_abs() {
let mut table = fresh_table();
table.insert_absolute(1, 4, 0xCAFE);
let history = b"PPPPabcdefghabcdefgh";
let m = find_best_match(
&table,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 12,
anchor_abs: 12,
lowest_index_abs: 0,
iend_abs: 16, min_match_length: 4,
},
)
.expect("a 4-byte forward match still passes the min_match floor");
assert_eq!(
m.forward_len, 4,
"forward count must cap at iend_abs - split_abs"
);
}
#[test]
fn find_best_match_filters_short_forward_matches() {
let mut table = fresh_table();
table.insert_absolute(1, 4, 0xCAFE);
let history = b"PPPPabXXXXXXab";
let m = find_best_match(
&table,
1,
0xCAFE,
FindBestMatchInputs {
live_history: history,
history_abs_start: 0,
split_abs: 12,
anchor_abs: 12,
lowest_index_abs: 0,
iend_abs: history.len(),
min_match_length: 4,
},
);
assert!(m.is_none());
}
}