use crate::{hasher::*, hashmap::BasicHashTable,Ranger};
struct InnerConfig{
l_step:usize,
src_len:usize,
trgt_len:usize,
src_win_size:usize,
max_end_pos:usize,
}
pub(crate) struct SrcMatcher{
pub(crate) l_step:usize,
pub(crate) fwd_hash: usize,
pub(crate) fwd_pos: usize,
pub(crate) max_fwd_hash_pos:usize,
table: BasicHashTable,
src_len:usize,
trgt_len:usize,
half_win_size:usize,
max_end_pos:usize,
cur_window_end:usize,
pub(crate) next_hash_pos:usize,
max_match_pos:usize,
}
impl SrcMatcher{
pub fn find_best_src_match(&mut self,src:&[u8],trgt:&[u8])->Option<(usize,usize,usize)>{
let table_pos = self.table.get(self.fwd_hash)?;
let src_pos = self.table_to_abs_pos(table_pos);
if let Some((pre_match,post_match)) = extend_src_match(src, src_pos, trgt, self.fwd_pos) {
let total_post_match = post_match + 9;
return Some((src_pos,pre_match,total_post_match));
}
None
}
pub(crate) fn table_to_abs_pos(&self, table_pos:usize)->usize{
table_pos * self.l_step
}
fn abs_to_table_pos(&self, abs_pos:usize)->usize{
debug_assert!(abs_pos % self.l_step == 0, "abs_pos({}) is !divisible by l_step({})",abs_pos,self.l_step);
abs_pos / self.l_step
}
fn store(&mut self, hash:usize, abs_pos:usize){
debug_assert!(abs_pos % self.l_step == 0);
let table_pos = self.abs_to_table_pos(abs_pos);
match self.table.insert(hash, table_pos){
Ok(None) => {},
Ok(Some(_prev)) => {
},
Err((_old_hash,_prev_pos)) => {
}
}
}
}
pub(crate) fn add_start_positions_to_matcher(matcher: &mut SrcMatcher, cur_o_pos: usize, src: &[u8]) {
if cur_o_pos < matcher.next_hash_pos{
return;
}
if let Some(range) = calculate_window_range(cur_o_pos, matcher.src_len, matcher.trgt_len, matcher.half_win_size, matcher.max_end_pos, matcher.cur_window_end, matcher.l_step,matcher.max_match_pos) {
debug_assert!(range.start % matcher.l_step == 0, "range.start({}) must be divisible by l_step({})",range.start,matcher.l_step);
if range.end >= matcher.max_end_pos{
matcher.next_hash_pos = usize::MAX;
}else{
matcher.next_hash_pos = cur_o_pos + (matcher.half_win_size);
}
if matcher.l_step >= 9 {
for pos in range.step_by(matcher.l_step).rev() {
let hash = calculate_large_checksum(&src[pos..pos + 9]);
matcher.store(hash, pos)
}
}else{
let aligned_last_hash = align(range.end-9,matcher.l_step);
let mut hash = calculate_large_checksum(&src[aligned_last_hash..range.end]);
for pos in (range.start..aligned_last_hash).rev() {
hash = update_large_checksum_bwd(hash, src[pos+9], src[pos]);
if pos % matcher.l_step == 0 {
matcher.store(hash, pos);
}
}
}
}
}
const DEFAULT_SRC_WIN_SIZE: usize = 1 << 26;
const MIN_SRC_WIN_SIZE: usize = 1 << 20;
const _HASH_CHUNK_SIZE: usize = 1 << 23;
#[derive(Debug, Clone)]
pub struct SrcMatcherConfig{
pub l_step: usize,
pub max_src_win_size: Option<usize>,
}
impl Default for SrcMatcherConfig {
fn default() -> Self {
Self::comp_level(3)
}
}
impl SrcMatcherConfig {
pub fn new(l_step: usize,max_src_win_size:Option<usize>) -> Self {
Self { l_step, max_src_win_size}
}
pub fn comp_level(level:usize)->Self{
assert!(level <= 9);
let l_step = Ranger::new(0..10, 26..=2).map(level);
Self { l_step, max_src_win_size: None}
}
fn make_inner_config(&mut self, src_len: usize,trgt_len:usize)->InnerConfig{
self.l_step = self.l_step.max(1);
self.max_src_win_size = Some(self
.max_src_win_size
.map(|s| s.next_power_of_two().max(MIN_SRC_WIN_SIZE))
.unwrap_or(
DEFAULT_SRC_WIN_SIZE
));
InnerConfig{
l_step:self.l_step,
src_len,
trgt_len,
src_win_size:self.max_src_win_size.unwrap(),
max_end_pos:align(src_len-9,self.l_step),
}
}
pub(crate) fn build(&mut self,src:&[u8],trgt_start_pos:usize,trgt:&[u8])->SrcMatcher{
let trgt_len = trgt.len();
let InnerConfig { l_step, src_len, trgt_len, src_win_size, max_end_pos } = self.make_inner_config(src.len(),trgt_len);
let max_fwd_hash_pos = trgt.len() - 9;
let (fwd_hash,fwd_pos) = if trgt_start_pos < max_fwd_hash_pos {
(calculate_large_checksum(&trgt[trgt_start_pos..trgt_start_pos+9]),trgt_start_pos)
}else{
(0,max_fwd_hash_pos)
};
let table_win_effective = (src_len.next_power_of_two() >> 1).min(src_win_size);
let table = BasicHashTable::new(table_win_effective/l_step, false);
let mut matcher = SrcMatcher{
table, src_len, trgt_len, max_end_pos,max_fwd_hash_pos,
fwd_hash,
fwd_pos,
l_step,
half_win_size: src_win_size>>1,
cur_window_end: 0,
next_hash_pos: 0,
max_match_pos: trgt_start_pos,
};
add_start_positions_to_matcher(&mut matcher, trgt_start_pos, src);
matcher
}
}
#[inline]
fn align(pos:usize,l_step:usize)->usize{
pos - (pos % l_step)
}
pub(crate) fn extend_src_match(src:&[u8],src_start:usize,trgt:&[u8],trgt_start:usize)->Option<(usize,usize)>{
let initial_match = src[src_start..src_start + 9]
.iter().zip(trgt[trgt_start..trgt_start + 9].iter())
.all(|(a,b)| a == b);
if !initial_match{
return None;
}
let min_offset = src_start.min(trgt_start);
let pre_match = if min_offset > 1 {
(1..min_offset).take_while(|&i| {
src[src_start - i] == trgt[trgt_start - i]
}).count()
}else{0};
let src_end = src_start + 9;
let trgt_end = trgt_start + 9;
let src_remain = src.len() - src_end;
let trgt_remain = trgt.len() - trgt_end;
let post_match = (0..src_remain.min(trgt_remain)).take_while(|&i| {
src[src_end + i] == trgt[trgt_end + i]
}).count();
Some((pre_match,post_match))
}
#[allow(dead_code)]
fn calculate_default_win_size(src_len: usize, trgt_len: usize,max_win_size:Option<usize>) -> usize {
let mut win_size = (src_len).abs_diff(trgt_len).next_power_of_two();
if win_size <= MIN_SRC_WIN_SIZE {
win_size = win_size + MIN_SRC_WIN_SIZE;
}
let upper_bound = src_len.next_power_of_two().min(max_win_size.map(|a|a.next_power_of_two()).unwrap_or(DEFAULT_SRC_WIN_SIZE));
win_size.min(upper_bound)
}
#[inline]
fn calculate_window_range(
cur_o_pos: usize,
src_len: usize,
trgt_len: usize,
half_win_size: usize,
max_end_pos: usize,
cur_window_end: usize,
l_step: usize,
max_match_end:usize,
) -> Option<std::ops::Range<usize>> {
if cur_o_pos < half_win_size {
return Some(0..(half_win_size<<1).min(max_end_pos));
}else if cur_window_end >= max_end_pos{
return None;
}
let scaled_src_pos = cur_o_pos * src_len / trgt_len;
let scaled_end = align((scaled_src_pos + half_win_size).min(max_end_pos),l_step);
if scaled_end <= cur_window_end || scaled_end <= max_match_end {return None;
}
let max_diff_start = scaled_end.saturating_sub(half_win_size<<1);
let scaled_start = align(cur_window_end.max(max_match_end).max(max_diff_start),l_step);
Some(scaled_start..scaled_end)
}
#[cfg(test)]
mod test_super {
use super::*;
#[test]
fn test_calculate_window_range_lrg_trgt() {
let half_win_size = 256;
let l_step = 2;
let src_len = 1000;
let trgt_len = 2000;
let max_end_pos = 994;
let result = calculate_window_range(10, src_len, trgt_len, half_win_size, max_end_pos, 0, l_step, 0);
assert_eq!(result.unwrap(), 0..512, "Initial fast forward: Incorrect range");
let result = calculate_window_range(10, src_len, trgt_len, 512, max_end_pos, 0, l_step, 0);
assert!(result.is_some(), "Initial fast forward: Expected Some(0..1024), but got {:?}", result);
assert_eq!(result.unwrap(), 0..max_end_pos, "Initial fast forward: Incorrect range");
let result = calculate_window_range(900, src_len, trgt_len, half_win_size, max_end_pos, 1000, l_step, 0);
assert!(result.is_none(), "End of file: Expected None, but got {:?}", result);
let result = calculate_window_range(500, src_len, trgt_len, half_win_size, max_end_pos, 700, l_step, 0);
assert!(result.is_none(), "Nothing to hash: Expected None, but got {:?}", result);
let result = calculate_window_range(1250, src_len, trgt_len, half_win_size, max_end_pos, 512, l_step, 0);
assert_eq!(result.unwrap(), 512..880, "Normal advancement: Incorrect range");
let result = calculate_window_range(400, src_len, trgt_len, half_win_size, max_end_pos, 300, l_step, 1000);
assert!(result.is_none(), "Scaled end too far: Expected None, but got {:?}", result);
let result = calculate_window_range(400, src_len, trgt_len, half_win_size, max_end_pos, 300, l_step, 700);
assert!(result.is_none(), "Max Match Exceeds Position: Expected None, but got {:?}", result);
}
#[test]
fn test_calculate_default_win_size_various_conditions() {
let src_len = 2_000_000;
let trgt_len = 100;
assert_eq!(calculate_default_win_size(src_len, trgt_len,None), src_len.next_power_of_two(), "test_large_source_small_target");
let src_len = 100;
let trgt_len = 2_000_000;
assert_eq!(calculate_default_win_size(src_len, trgt_len,None), src_len.next_power_of_two(), "test_small_source_large_target");
let src_len = 100_000_000;
let trgt_len = 90_000_000;
assert_eq!(calculate_default_win_size(src_len, trgt_len,None), 10_000_000usize.next_power_of_two(), "test_different_lengths");
let src_len = 100_000_000;
let trgt_len = (src_len - MIN_SRC_WIN_SIZE) + 10;
assert_eq!(calculate_default_win_size(src_len, trgt_len,None), (src_len - trgt_len).next_power_of_two() + MIN_SRC_WIN_SIZE, "test_diff_below_min_src_win_size");
let src_len = 100_000_000;
let trgt_len = 25_000_000;
assert_eq!(calculate_default_win_size(src_len, trgt_len,None), DEFAULT_SRC_WIN_SIZE, "test_source_below_min_src_win_size");
}
}