1
2use crate::{hasher::*, hashmap::BasicHashTable,Ranger};
3
4struct InnerConfig{
5 l_step:usize,
6 src_len:usize,
7 trgt_len:usize,
8 src_win_size:usize,
9 max_end_pos:usize,
10}
11
12pub(crate) struct SrcMatcher{
13 pub(crate) l_step:usize,
14 pub(crate) fwd_hash: usize,
15 pub(crate) fwd_pos: usize,
16 pub(crate) max_fwd_hash_pos:usize,
17 table: BasicHashTable,
18 src_len:usize,
20 trgt_len:usize,
21 half_win_size:usize,
22 max_end_pos:usize,
23 cur_window_end:usize,
24 pub(crate) next_hash_pos:usize,
25 max_match_pos:usize,
26}
27
28impl SrcMatcher{
29
30 pub fn find_best_src_match(&mut self,src:&[u8],trgt:&[u8])->Option<(usize,usize,usize)>{
32 let table_pos = self.table.get(self.fwd_hash)?;
33 let src_pos = self.table_to_abs_pos(table_pos);
34 if let Some((pre_match,post_match)) = extend_src_match(src, src_pos, trgt, self.fwd_pos) {
35 let total_post_match = post_match + 9;
36 return Some((src_pos,pre_match,total_post_match));
37 }
38 None
39 }
40 pub(crate) fn table_to_abs_pos(&self, table_pos:usize)->usize{
43 table_pos * self.l_step
44 }
45 fn abs_to_table_pos(&self, abs_pos:usize)->usize{
46 debug_assert!(abs_pos % self.l_step == 0, "abs_pos({}) is !divisible by l_step({})",abs_pos,self.l_step);
47 abs_pos / self.l_step
48 }
49 fn store(&mut self, hash:usize, abs_pos:usize){
50 debug_assert!(abs_pos % self.l_step == 0);
51 let table_pos = self.abs_to_table_pos(abs_pos);
52 match self.table.insert(hash, table_pos){
53 Ok(None) => {},
54 Ok(Some(_prev)) => {
55 },
57 Err((_old_hash,_prev_pos)) => {
58 }
60 }
61 }
62}
63
64pub(crate) fn add_start_positions_to_matcher(matcher: &mut SrcMatcher, cur_o_pos: usize, src: &[u8]) {
69 if cur_o_pos < matcher.next_hash_pos{
70 return;
71 }
72 if let Some(range) = calculate_window_range(cur_o_pos, matcher.src_len, matcher.trgt_len, matcher.half_win_size, matcher.max_end_pos, matcher.cur_window_end, matcher.l_step,matcher.max_match_pos) {
73 debug_assert!(range.start % matcher.l_step == 0, "range.start({}) must be divisible by l_step({})",range.start,matcher.l_step);
82 if range.end >= matcher.max_end_pos{
83 matcher.next_hash_pos = usize::MAX;
84 }else{
85 matcher.next_hash_pos = cur_o_pos + (matcher.half_win_size);
90 }
91
92 if matcher.l_step >= 9 {
93 for pos in range.step_by(matcher.l_step).rev() {
94 let hash = calculate_large_checksum(&src[pos..pos + 9]);
95 matcher.store(hash, pos)
96 }
97 }else{
98 let aligned_last_hash = align(range.end.saturating_sub(9),matcher.l_step);
99 let mut hash = calculate_large_checksum(&src[aligned_last_hash..range.end]);
100 for pos in (range.start..aligned_last_hash).rev() {
101 hash = update_large_checksum_bwd(hash, src[pos+9], src[pos]);
102 if pos % matcher.l_step == 0 {
103 matcher.store(hash, pos);
104 }
105 }
106 }
115 }
116}
117const DEFAULT_SRC_WIN_SIZE: usize = 1 << 26;
118const MIN_SRC_WIN_SIZE: usize = 1 << 20;
119const _HASH_CHUNK_SIZE: usize = 1 << 23;
120#[derive(Debug, Clone)]
124pub struct SrcMatcherConfig{
125 pub l_step: usize,
128 pub max_src_win_size: Option<usize>,
133}
134
135impl Default for SrcMatcherConfig {
136 fn default() -> Self {
137 Self::comp_level(3)
138 }
139}
140impl SrcMatcherConfig {
141 pub fn new(l_step: usize,max_src_win_size:Option<usize>) -> Self {
145 Self { l_step, max_src_win_size}
146 }
147 pub fn comp_level(level:usize)->Self{
151 assert!(level <= 9);
152 let l_step = Ranger::new(0..10, 26..=2).map(level);
153 Self { l_step, max_src_win_size: None}
154 }
155 fn make_inner_config(&mut self, src_len: usize,trgt_len:usize)->InnerConfig{
156 self.l_step = self.l_step.max(1);
157
158 self.max_src_win_size = Some(self
159 .max_src_win_size
160 .map(|s| s.next_power_of_two().max(MIN_SRC_WIN_SIZE))
161 .unwrap_or(
162 DEFAULT_SRC_WIN_SIZE
163 ));
165 InnerConfig{
166 l_step:self.l_step,
167 src_len,
168 trgt_len,
169 src_win_size:self.max_src_win_size.unwrap(),
170 max_end_pos:align(src_len.saturating_sub(9),self.l_step),
171 }
172 }
173 pub(crate) fn build(&mut self,src:&[u8],trgt_start_pos:usize,trgt:&[u8])->SrcMatcher{
174 let trgt_len = trgt.len();
175 let InnerConfig { l_step, src_len, trgt_len, src_win_size, max_end_pos } = self.make_inner_config(src.len(),trgt_len);
176 let max_fwd_hash_pos = trgt.len().saturating_sub(9);
177 let (fwd_hash,fwd_pos) = if trgt_start_pos < max_fwd_hash_pos {
178 (calculate_large_checksum(&trgt[trgt_start_pos..trgt_start_pos+9]),trgt_start_pos)
179 }else{
180 (0,max_fwd_hash_pos)
181 };
182 let table_win_effective = (src_len.next_power_of_two() >> 1).min(src_win_size);
184 let table = BasicHashTable::new(table_win_effective/l_step, false);
185 let mut matcher = SrcMatcher{
186 table, src_len, trgt_len, max_end_pos,max_fwd_hash_pos,
187 fwd_hash,
188 fwd_pos,
189 l_step,
190 half_win_size: src_win_size>>1,
191 cur_window_end: 0,
192 next_hash_pos: 0,
193 max_match_pos: trgt_start_pos,
194 };
195 add_start_positions_to_matcher(&mut matcher, trgt_start_pos, src);
197 matcher
198 }
199}
200
201#[inline]
202fn align(pos:usize,l_step:usize)->usize{
203 pos - (pos % l_step)
204}
205
206pub(crate) fn extend_src_match(src:&[u8],src_start:usize,trgt:&[u8],trgt_start:usize)->Option<(usize,usize)>{
209 let initial_match = src[src_start..src_start + 9]
211 .iter().zip(trgt[trgt_start..trgt_start + 9].iter())
212 .all(|(a,b)| a == b);
213 if !initial_match{
214 return None;
215 }
216 let min_offset = src_start.min(trgt_start);
218 let pre_match = if min_offset > 1 {
219 (1..min_offset).take_while(|&i| {
220 src[src_start - i] == trgt[trgt_start - i]
221 }).count()
222 }else{0};
223
224 let src_end = src_start + 9;
226 let trgt_end = trgt_start + 9;
227 let src_remain = src.len() - src_end;
228 let trgt_remain = trgt.len() - trgt_end;
229 let post_match = (0..src_remain.min(trgt_remain)).take_while(|&i| {
230 src[src_end + i] == trgt[trgt_end + i]
231 }).count();
232 Some((pre_match,post_match))
233}
234
235#[allow(dead_code)]
237fn calculate_default_win_size(src_len: usize, trgt_len: usize,max_win_size:Option<usize>) -> usize {
238 let mut win_size = (src_len).abs_diff(trgt_len).next_power_of_two();
239 if win_size <= MIN_SRC_WIN_SIZE {
240 win_size = win_size + MIN_SRC_WIN_SIZE;
241 }
242 let upper_bound = src_len.next_power_of_two().min(max_win_size.map(|a|a.next_power_of_two()).unwrap_or(DEFAULT_SRC_WIN_SIZE));
243 win_size.min(upper_bound)
244}
245
246#[inline]
247fn calculate_window_range(
248 cur_o_pos: usize,
249 src_len: usize,
250 trgt_len: usize,
251 half_win_size: usize,
252 max_end_pos: usize,
253 cur_window_end: usize,
254 l_step: usize,
255 max_match_end:usize,
256) -> Option<std::ops::Range<usize>> {
257 if cur_o_pos < half_win_size {
259 return Some(0..(half_win_size<<1).min(max_end_pos));
260 }else if cur_window_end >= max_end_pos{
261 return None;
262 }
263 let scaled_src_pos = cur_o_pos * src_len / trgt_len;
272
273 let scaled_end = align((scaled_src_pos + half_win_size).min(max_end_pos),l_step);
275 if scaled_end <= cur_window_end || scaled_end <= max_match_end {return None;
278 }
279 let max_diff_start = scaled_end.saturating_sub(half_win_size<<1);
283 let scaled_start = align(cur_window_end.max(max_match_end).max(max_diff_start),l_step);
284 Some(scaled_start..scaled_end)
285}
286
287#[cfg(test)]
288mod test_super {
289 use super::*;
290
291 #[test]
292 fn test_calculate_window_range_lrg_trgt() {
293 let half_win_size = 256;
294 let l_step = 2;
295 let src_len = 1000;
296 let trgt_len = 2000;
297 let max_end_pos = 994;
298
299 let result = calculate_window_range(10, src_len, trgt_len, half_win_size, max_end_pos, 0, l_step, 0);
301 assert_eq!(result.unwrap(), 0..512, "Initial fast forward: Incorrect range");
302
303 let result = calculate_window_range(10, src_len, trgt_len, 512, max_end_pos, 0, l_step, 0);
304 assert!(result.is_some(), "Initial fast forward: Expected Some(0..1024), but got {:?}", result);
305 assert_eq!(result.unwrap(), 0..max_end_pos, "Initial fast forward: Incorrect range");
306
307 let result = calculate_window_range(900, src_len, trgt_len, half_win_size, max_end_pos, 1000, l_step, 0);
309 assert!(result.is_none(), "End of file: Expected None, but got {:?}", result);
310
311 let result = calculate_window_range(500, src_len, trgt_len, half_win_size, max_end_pos, 700, l_step, 0);
313 assert!(result.is_none(), "Nothing to hash: Expected None, but got {:?}", result);
314
315 let result = calculate_window_range(1250, src_len, trgt_len, half_win_size, max_end_pos, 512, l_step, 0);
317 assert_eq!(result.unwrap(), 512..880, "Normal advancement: Incorrect range");
318
319 let result = calculate_window_range(400, src_len, trgt_len, half_win_size, max_end_pos, 300, l_step, 1000);
321 assert!(result.is_none(), "Scaled end too far: Expected None, but got {:?}", result);
322
323 let result = calculate_window_range(400, src_len, trgt_len, half_win_size, max_end_pos, 300, l_step, 700);
325 assert!(result.is_none(), "Max Match Exceeds Position: Expected None, but got {:?}", result);
326 }
327
328 #[test]
329 fn test_calculate_default_win_size_various_conditions() {
330 let src_len = 2_000_000;
332 let trgt_len = 100;
333 assert_eq!(calculate_default_win_size(src_len, trgt_len,None), src_len.next_power_of_two(), "test_large_source_small_target");
334
335 let src_len = 100;
337 let trgt_len = 2_000_000;
338 assert_eq!(calculate_default_win_size(src_len, trgt_len,None), src_len.next_power_of_two(), "test_small_source_large_target");
339
340 let src_len = 100_000_000;
342 let trgt_len = 90_000_000;
343 assert_eq!(calculate_default_win_size(src_len, trgt_len,None), 10_000_000usize.next_power_of_two(), "test_different_lengths");
344
345 let src_len = 100_000_000;
347 let trgt_len = (src_len - MIN_SRC_WIN_SIZE) + 10;
348 assert_eq!(calculate_default_win_size(src_len, trgt_len,None), (src_len - trgt_len).next_power_of_two() + MIN_SRC_WIN_SIZE, "test_diff_below_min_src_win_size");
349
350 let src_len = 100_000_000;
352 let trgt_len = 25_000_000;
353 assert_eq!(calculate_default_win_size(src_len, trgt_len,None), DEFAULT_SRC_WIN_SIZE, "test_source_below_min_src_win_size");
354 }
355
356}