use crate::compression::dict_zip::compression_types::{CompressionType, MAX_FAR3_LONG_DISTANCE};
use crate::error::{Result, ZiporaError};
use crate::hash_map::{ZiporaHashMap, fabo_hash_combine_u32, SimdStringOps};
use crate::memory::SecureMemoryPool;
#[cfg(test)]
use crate::memory::get_global_pool_for_size;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::cmp::{Ordering, min};
use std::collections::VecDeque;
use std::sync::Arc;
pub const DEFAULT_WINDOW_SIZE: usize = 64 * 1024;
pub const DEFAULT_MAX_PROBE_DISTANCE: usize = 8;
pub const DEFAULT_MIN_MATCH_LENGTH: usize = 3;
pub const DEFAULT_MAX_MATCH_LENGTH: usize = 258;
pub const DEFAULT_HASH_TABLE_CAPACITY: usize = 4096;
pub const MAX_CHAIN_LENGTH: usize = 16;
pub const HASH_PATTERN_LENGTH: usize = 4;
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LocalMatch {
pub length: usize,
pub distance: usize,
pub input_position: usize,
pub history_position: usize,
pub compression_type: CompressionType,
pub quality: f64,
pub compression_benefit: isize,
}
impl LocalMatch {
pub fn new(
length: usize,
distance: usize,
input_position: usize,
history_position: usize,
) -> Self {
let compression_type = Self::determine_compression_type(distance, length);
let quality = Self::calculate_quality(length, distance, compression_type);
let compression_benefit = Self::calculate_compression_benefit(length, compression_type);
Self {
length,
distance,
input_position,
history_position,
compression_type,
quality,
compression_benefit,
}
}
fn determine_compression_type(distance: usize, length: usize) -> CompressionType {
if distance == 1 && length >= 2 && length <= 33 {
CompressionType::RLE
} else if distance >= 2 && distance <= 9 && length >= 2 && length <= 5 {
CompressionType::NearShort
} else if distance >= 2 && distance <= 257 && length >= 2 && length <= 33 {
CompressionType::Far1Short
} else if distance >= 258 && distance <= 65793 && length >= 2 && length <= 33 {
CompressionType::Far2Short
} else if distance <= 65535 && length >= 34 {
CompressionType::Far2Long
} else if distance <= MAX_FAR3_LONG_DISTANCE && length >= 34 {
CompressionType::Far3Long
} else {
CompressionType::Far1Short
}
}
fn calculate_quality(length: usize, distance: usize, compression_type: CompressionType) -> f64 {
let length_quality = 1.0 - (-(length as f64) / 50.0).exp();
let distance_penalty = match compression_type {
CompressionType::RLE => 0.0, CompressionType::NearShort => 0.05,
CompressionType::Far1Short => 0.1,
CompressionType::Far2Short => 0.15,
CompressionType::Far2Long => 0.1, CompressionType::Far3Long => 0.12,
_ => 0.2, };
let type_bonus = match compression_type {
CompressionType::RLE => 0.2, CompressionType::NearShort => 0.1,
CompressionType::Far1Short => 0.05,
CompressionType::Far2Short => 0.0,
CompressionType::Far2Long => 0.15, CompressionType::Far3Long => 0.1,
_ => 0.0,
};
(length_quality - distance_penalty + type_bonus).clamp(0.0, 1.0)
}
fn calculate_compression_benefit(length: usize, compression_type: CompressionType) -> isize {
let encoding_cost = match compression_type {
CompressionType::RLE => 2, CompressionType::NearShort => 2, CompressionType::Far1Short => 3, CompressionType::Far2Short => 4, CompressionType::Far2Long => 4, CompressionType::Far3Long => 5, _ => 4, };
length as isize - encoding_cost
}
pub fn is_better_than(&self, other: &LocalMatch) -> bool {
match self.compression_benefit.cmp(&other.compression_benefit) {
Ordering::Greater => true,
Ordering::Equal => {
match self.length.cmp(&other.length) {
Ordering::Greater => true,
Ordering::Equal => {
self.quality > other.quality
}
Ordering::Less => false,
}
}
Ordering::Less => false,
}
}
}
#[derive(Debug, Clone, Copy)]
struct ChainEntry {
position: usize,
pattern_hash: u32,
available_length: usize,
}
impl ChainEntry {
fn new(position: usize, pattern_hash: u32, available_length: usize) -> Self {
Self {
position,
pattern_hash,
available_length,
}
}
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct LocalMatcherConfig {
pub window_size: usize,
pub max_probe_distance: usize,
pub min_match_length: usize,
pub max_match_length: usize,
pub hash_table_capacity: usize,
pub enable_simd: bool,
pub max_matches_per_search: usize,
pub enable_rle_detection: bool,
pub min_rle_length: usize,
}
impl Default for LocalMatcherConfig {
fn default() -> Self {
Self {
window_size: DEFAULT_WINDOW_SIZE,
max_probe_distance: DEFAULT_MAX_PROBE_DISTANCE,
min_match_length: DEFAULT_MIN_MATCH_LENGTH,
max_match_length: DEFAULT_MAX_MATCH_LENGTH,
hash_table_capacity: DEFAULT_HASH_TABLE_CAPACITY,
enable_simd: cfg!(feature = "simd"),
max_matches_per_search: 8,
enable_rle_detection: true,
min_rle_length: 3,
}
}
}
impl LocalMatcherConfig {
pub fn validate(&self) -> Result<()> {
if self.window_size == 0 {
return Err(ZiporaError::invalid_data("Window size must be > 0"));
}
if self.window_size > 16 * 1024 * 1024 {
return Err(ZiporaError::invalid_data("Window size too large (max 16MB)"));
}
if self.max_probe_distance == 0 {
return Err(ZiporaError::invalid_data("Max probe distance must be > 0"));
}
if self.max_probe_distance > MAX_CHAIN_LENGTH {
return Err(ZiporaError::invalid_data(
format!("Max probe distance must be <= {}", MAX_CHAIN_LENGTH).as_str()
));
}
if self.min_match_length == 0 {
return Err(ZiporaError::invalid_data("Min match length must be > 0"));
}
if self.max_match_length < self.min_match_length {
return Err(ZiporaError::invalid_data(
"Max match length must be >= min match length"
));
}
if self.max_match_length > 65536 {
return Err(ZiporaError::invalid_data("Max match length too large (max 64KB)"));
}
Ok(())
}
pub fn fast_compression() -> Self {
Self {
window_size: 32 * 1024, max_probe_distance: 4,
min_match_length: 4,
max_match_length: 64,
max_matches_per_search: 4,
..Default::default()
}
}
pub fn max_compression() -> Self {
Self {
window_size: 256 * 1024, max_probe_distance: 16,
min_match_length: 3,
max_match_length: 512,
max_matches_per_search: 16,
..Default::default()
}
}
pub fn realtime() -> Self {
Self {
window_size: 16 * 1024, max_probe_distance: 2,
min_match_length: 4,
max_match_length: 32,
max_matches_per_search: 2,
enable_simd: true,
..Default::default()
}
}
}
#[derive(Debug, Clone, Default)]
pub struct LocalMatcherStats {
pub bytes_added: u64,
pub searches_performed: u64,
pub matches_found: u64,
pub hash_collisions: u64,
pub string_comparisons: u64,
pub simd_time_us: u64,
pub avg_match_length: f64,
pub hash_table_load_factor: f64,
pub entries_evicted: u64,
}
impl LocalMatcherStats {
pub fn match_success_ratio(&self) -> f64 {
if self.searches_performed == 0 {
0.0
} else {
self.matches_found as f64 / self.searches_performed as f64
}
}
pub fn search_efficiency(&self) -> f64 {
if self.hash_collisions == 0 {
if self.matches_found > 0 { f64::INFINITY } else { 0.0 }
} else {
self.matches_found as f64 / self.hash_collisions as f64
}
}
}
#[derive(Clone)]
pub struct LocalMatcher {
config: LocalMatcherConfig,
window: VecDeque<u8>,
current_position: usize,
hash_table: ZiporaHashMap<u32, Vec<ChainEntry>>,
simd_ops: Arc<SimdStringOps>,
memory_pool: Arc<SecureMemoryPool>,
stats: LocalMatcherStats,
}
impl LocalMatcher {
pub fn new(config: LocalMatcherConfig, memory_pool: Arc<SecureMemoryPool>) -> Result<Self> {
config.validate()?;
let hash_table = ZiporaHashMap::new()?;
let simd_ops = Arc::new(SimdStringOps::new());
let window_size = config.window_size;
Ok(Self {
config,
window: VecDeque::with_capacity(window_size),
current_position: 0,
hash_table,
simd_ops,
memory_pool,
stats: LocalMatcherStats::default(),
})
}
pub fn add_byte(&mut self, byte: u8, position: usize) -> Result<()> {
self.current_position = position;
if self.window.len() >= self.config.window_size {
let _removed_byte = self.window.pop_front()
.ok_or_else(|| ZiporaError::invalid_data("Window unexpectedly empty during byte removal"))?;
self.cleanup_hash_table_entry(position - self.config.window_size)?;
}
self.window.push_back(byte);
self.stats.bytes_added += 1;
self.add_hash_table_entries(position)?;
Ok(())
}
pub fn add_bytes(&mut self, bytes: &[u8], start_position: usize) -> Result<()> {
for (i, &byte) in bytes.iter().enumerate() {
self.add_byte(byte, start_position + i)?;
}
Ok(())
}
pub fn find_matches(
&mut self,
input: &[u8],
input_pos: usize,
max_search_length: usize,
) -> Result<Vec<LocalMatch>> {
self.stats.searches_performed += 1;
if input_pos >= input.len() {
return Ok(Vec::new());
}
let search_end = min(
input_pos + max_search_length,
input.len()
);
let pattern_len = min(
search_end - input_pos,
self.config.max_match_length
);
if pattern_len < self.config.min_match_length {
return Ok(Vec::new());
}
let mut matches = Vec::new();
if self.config.enable_rle_detection {
if let Some(rle_match) = self.find_rle_match(input, input_pos, pattern_len)? {
matches.push(rle_match);
}
}
let pattern_matches = self.find_pattern_matches(input, input_pos, pattern_len)?;
matches.extend(pattern_matches);
matches.sort_by(|a, b| {
b.quality.partial_cmp(&a.quality).unwrap_or(Ordering::Equal)
.then_with(|| b.compression_benefit.cmp(&a.compression_benefit))
});
matches.truncate(self.config.max_matches_per_search);
self.stats.matches_found += matches.len() as u64;
if !matches.is_empty() {
let total_length: usize = matches.iter().map(|m| m.length).sum();
let avg_length = total_length as f64 / matches.len() as f64;
self.stats.avg_match_length =
(self.stats.avg_match_length * (self.stats.searches_performed - 1) as f64 + avg_length)
/ self.stats.searches_performed as f64;
}
Ok(matches)
}
fn find_rle_match(
&self,
input: &[u8],
input_pos: usize,
max_length: usize,
) -> Result<Option<LocalMatch>> {
if input_pos == 0 || input_pos >= input.len() {
return Ok(None);
}
let current_byte = input[input_pos];
let prev_byte = input[input_pos - 1];
if current_byte != prev_byte {
return Ok(None);
}
let mut rle_length = 1; for i in (input_pos + 1)..min(input_pos + max_length, input.len()) {
if input[i] == current_byte {
rle_length += 1;
} else {
break;
}
}
if rle_length >= self.config.min_rle_length {
Ok(Some(LocalMatch::new(
rle_length,
1, input_pos,
input_pos - 1,
)))
} else {
Ok(None)
}
}
fn find_pattern_matches(
&mut self,
input: &[u8],
input_pos: usize,
max_length: usize,
) -> Result<Vec<LocalMatch>> {
if input_pos + HASH_PATTERN_LENGTH > input.len() {
return Ok(Vec::new());
}
let pattern_bytes = &input[input_pos..input_pos + HASH_PATTERN_LENGTH];
let pattern_hash = self.hash_pattern(pattern_bytes);
let mut matches = Vec::new();
let chain_copy = match self.hash_table.get(&pattern_hash) {
Some(chain) => {
self.stats.hash_collisions += 1;
chain.clone() }
None => return Ok(matches),
};
for (probe_idx, entry) in chain_copy.iter().enumerate() {
if probe_idx >= self.config.max_probe_distance {
break;
}
let window_position = self.get_window_position(entry.position);
if window_position.is_none() {
continue; }
if entry.position >= input_pos {
continue; }
let distance = input_pos - entry.position;
if distance == 0 || distance > MAX_FAR3_LONG_DISTANCE {
continue;
}
if entry.pattern_hash != pattern_hash {
continue;
}
let match_length = self.compare_strings_and_find_length(
input,
input_pos,
entry.position,
max_length,
)?;
if match_length >= self.config.min_match_length {
let local_match = LocalMatch::new(
match_length,
distance,
input_pos,
entry.position,
);
matches.push(local_match);
}
self.stats.string_comparisons += 1;
}
Ok(matches)
}
fn hash_pattern(&self, pattern: &[u8]) -> u32 {
if pattern.len() >= 4 {
let word = u32::from_le_bytes([pattern[0], pattern[1], pattern[2], pattern[3]]);
fabo_hash_combine_u32(word, 0x9e3779b9) } else {
let mut word = 0u32;
for (i, &byte) in pattern.iter().enumerate() {
word |= (byte as u32) << (i * 8);
}
fabo_hash_combine_u32(word, 0x9e3779b9)
}
}
fn compare_strings_and_find_length(
&mut self,
input: &[u8],
input_pos: usize,
history_pos: usize,
max_length: usize,
) -> Result<usize> {
let input_slice = &input[input_pos..min(input_pos + max_length, input.len())];
let window_idx = match self.get_window_position(history_pos) {
Some(idx) => idx,
None => return Ok(0),
};
if window_idx + max_length > self.window.len() {
return Ok(0);
}
let history_slice = {
let (first, second) = self.window.as_slices();
if window_idx + max_length <= first.len() {
&first[window_idx..window_idx + max_length]
} else if window_idx >= first.len() {
let second_idx = window_idx - first.len();
&second[second_idx..min(second_idx + max_length, second.len())]
} else {
return self.compare_strings_across_boundary(input_slice, window_idx, max_length);
}
};
#[cfg(feature = "simd")]
if self.config.enable_simd && max_length >= 16 {
let start_time = std::time::Instant::now();
let result = self.simd_compare_and_find_length(input_slice, history_slice, max_length);
self.stats.simd_time_us += start_time.elapsed().as_micros() as u64;
return Ok(result);
}
Ok(self.scalar_compare_and_find_length(input_slice, history_slice))
}
fn compare_strings_across_boundary(
&self,
input_slice: &[u8],
window_idx: usize,
max_length: usize,
) -> Result<usize> {
let search_len = min(input_slice.len(), max_length);
if search_len == 0 {
return Ok(0);
}
let (first, second) = self.window.as_slices();
let mut match_length = 0;
let mut input_pos = 0;
let mut window_pos = window_idx;
if window_pos < first.len() {
let first_chunk_len = min(first.len() - window_pos, search_len);
let first_chunk = &first[window_pos..window_pos + first_chunk_len];
let input_chunk = &input_slice[input_pos..input_pos + first_chunk_len];
for i in 0..first_chunk_len {
if input_chunk[i] == first_chunk[i] {
match_length += 1;
} else {
return Ok(match_length);
}
}
input_pos += first_chunk_len;
window_pos = 0; } else {
window_pos -= first.len();
}
if input_pos < search_len && !second.is_empty() {
let remaining_search = search_len - input_pos;
let second_chunk_len = min(second.len() - window_pos, remaining_search);
if second_chunk_len > 0 {
let second_chunk = &second[window_pos..window_pos + second_chunk_len];
let input_chunk = &input_slice[input_pos..input_pos + second_chunk_len];
for i in 0..second_chunk_len {
if input_chunk[i] == second_chunk[i] {
match_length += 1;
} else {
return Ok(match_length);
}
}
}
}
Ok(match_length)
}
#[cfg(feature = "simd")]
fn simd_compare_and_find_length(&self, input: &[u8], history: &[u8], max_length: usize) -> usize {
use std::arch::x86_64::*;
let max_len = min(min(input.len(), history.len()), max_length);
let mut pos = 0;
unsafe {
let simd_chunks = max_len / 16;
for _ in 0..simd_chunks {
if pos + 16 > input.len() || pos + 16 > history.len() {
break;
}
let input_chunk = _mm_loadu_si128(input.as_ptr().add(pos) as *const __m128i);
let history_chunk = _mm_loadu_si128(history.as_ptr().add(pos) as *const __m128i);
let comparison = _mm_cmpeq_epi8(input_chunk, history_chunk);
let mask = _mm_movemask_epi8(comparison) as u16;
if mask != 0xFFFF {
let mismatch_pos = mask.trailing_ones() as usize;
return pos + mismatch_pos;
}
pos += 16;
}
}
while pos < max_len {
if input[pos] != history[pos] {
break;
}
pos += 1;
}
pos
}
fn scalar_compare_and_find_length(&self, input: &[u8], history: &[u8]) -> usize {
let max_len = min(input.len(), history.len());
for i in 0..max_len {
if input[i] != history[i] {
return i;
}
}
max_len
}
fn add_hash_table_entries(&mut self, position: usize) -> Result<()> {
if self.window.len() < HASH_PATTERN_LENGTH {
return Ok(());
}
if position + 1 < HASH_PATTERN_LENGTH {
return Ok(());
}
let pattern_start = self.window.len() - HASH_PATTERN_LENGTH;
let pattern_bytes: Vec<u8> = self.window.range(pattern_start..).copied().collect();
let pattern_hash = self.hash_pattern(&pattern_bytes);
let pattern_start_position = position + 1 - HASH_PATTERN_LENGTH;
let entry = ChainEntry::new(
pattern_start_position,
pattern_hash,
self.window.len() - pattern_start,
);
if self.hash_table.get(&pattern_hash).is_none() {
self.hash_table.insert(pattern_hash, Vec::new());
}
let chain = self.hash_table.get_mut(&pattern_hash)
.ok_or_else(|| ZiporaError::invalid_data("Hash table entry unexpectedly missing"))?;
if chain.len() >= MAX_CHAIN_LENGTH {
chain.remove(0); self.stats.entries_evicted += 1;
}
chain.push(entry);
self.stats.hash_table_load_factor =
self.hash_table.len() as f64 / self.config.hash_table_capacity as f64;
Ok(())
}
fn cleanup_hash_table_entry(&mut self, evicted_position: usize) -> Result<()> {
let mut _empty_keys: Vec<u32> = Vec::new();
let keys_to_remove: Vec<u32> = self.hash_table.iter()
.filter_map(|(hash, chain)| {
if chain.iter().all(|entry| entry.position <= evicted_position) {
Some(*hash)
} else {
None
}
})
.collect();
for key in keys_to_remove {
self.hash_table.remove(&key);
}
Ok(())
}
fn get_window_position(&self, absolute_position: usize) -> Option<usize> {
if absolute_position > self.current_position {
return None;
}
let distance = self.current_position - absolute_position;
if distance >= self.window.len() {
return None;
}
Some(self.window.len() - distance - 1)
}
pub fn stats(&self) -> &LocalMatcherStats {
&self.stats
}
pub fn reset_stats(&mut self) {
self.stats = LocalMatcherStats::default();
}
pub fn config(&self) -> &LocalMatcherConfig {
&self.config
}
pub fn window_size(&self) -> usize {
self.window.len()
}
pub fn is_window_full(&self) -> bool {
self.window.len() >= self.config.window_size
}
pub fn clear(&mut self) {
self.window.clear();
self.hash_table.clear();
self.current_position = 0;
self.reset_stats();
}
pub fn validate(&self) -> Result<()> {
if self.window.len() > self.config.window_size {
return Err(ZiporaError::invalid_data("Window size exceeds configuration limit"));
}
for (_, chain) in self.hash_table.iter() {
if chain.len() > MAX_CHAIN_LENGTH {
return Err(ZiporaError::invalid_data("Hash chain exceeds maximum length"));
}
}
Ok(())
}
pub fn find_match(
&mut self,
remaining: &[u8],
max_probe_distance: usize,
max_length: usize,
) -> Result<Option<LocalMatch>> {
if remaining.is_empty() {
return Ok(None);
}
let matches = self.find_matches(remaining, 0, max_length)?;
Ok(matches
.into_iter()
.filter(|m| m.distance <= max_probe_distance)
.max_by_key(|m| m.length))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_matcher() -> LocalMatcher {
let config = LocalMatcherConfig {
window_size: 1024,
max_probe_distance: 4,
min_match_length: 3,
max_match_length: 64,
..Default::default()
};
let pool = get_global_pool_for_size(1024).clone();
LocalMatcher::new(config, pool).unwrap()
}
#[test]
fn test_local_matcher_creation() {
let matcher = create_test_matcher();
assert_eq!(matcher.config().window_size, 1024);
assert_eq!(matcher.config().max_probe_distance, 4);
assert_eq!(matcher.window_size(), 0);
assert!(!matcher.is_window_full());
}
#[test]
fn test_add_bytes() {
let mut matcher = create_test_matcher();
let data = b"hello world hello";
for (i, &byte) in data.iter().enumerate() {
matcher.add_byte(byte, i).unwrap();
}
assert_eq!(matcher.window_size(), data.len());
assert_eq!(matcher.stats().bytes_added, data.len() as u64);
}
#[test]
fn test_sliding_window_overflow() {
let mut matcher = LocalMatcher::new(
LocalMatcherConfig {
window_size: 8,
..Default::default()
},
get_global_pool_for_size(1024).clone()
).unwrap();
let data = b"0123456789abcdef";
for (i, &byte) in data.iter().enumerate() {
matcher.add_byte(byte, i).unwrap();
}
assert_eq!(matcher.window_size(), 8);
assert!(matcher.is_window_full());
let window_data: Vec<u8> = matcher.window.iter().copied().collect();
assert_eq!(window_data, b"89abcdef");
}
#[test]
fn test_rle_detection() {
let mut matcher = create_test_matcher();
let data = b"abcaaaa";
for (i, &byte) in data.iter().enumerate() {
matcher.add_byte(byte, i).unwrap();
}
let matches = matcher.find_matches(data, 4, 10).unwrap();
assert!(!matches.is_empty());
let rle_match = &matches[0];
assert_eq!(rle_match.compression_type, CompressionType::RLE);
assert_eq!(rle_match.distance, 1);
assert!(rle_match.length >= 3); }
#[test]
fn test_pattern_matching() {
let mut matcher = create_test_matcher();
let data = b"abcabcabc";
for (i, &byte) in data.iter().enumerate() {
matcher.add_byte(byte, i).unwrap();
}
let matches = matcher.find_matches(data, 3, 6).unwrap();
assert!(!matches.is_empty(), "Should find pattern match in 'abcabcabc'");
let best_match = &matches[0];
assert!(best_match.length >= 3); assert!(best_match.distance > 0 && best_match.distance <= 6); assert_ne!(best_match.compression_type, CompressionType::RLE);
}
#[test]
fn test_compression_type_determination() {
let rle_match = LocalMatch::new(5, 1, 10, 9);
assert_eq!(rle_match.compression_type, CompressionType::RLE);
let near_match = LocalMatch::new(4, 5, 10, 5);
assert_eq!(near_match.compression_type, CompressionType::NearShort);
let far1_match = LocalMatch::new(10, 100, 10, 0);
assert_eq!(far1_match.compression_type, CompressionType::Far1Short);
let far2_long_match = LocalMatch::new(50, 1000, 100, 50);
assert_eq!(far2_long_match.compression_type, CompressionType::Far2Long);
}
#[test]
fn test_match_quality_calculation() {
let short_match = LocalMatch::new(3, 5, 0, 0);
let long_match = LocalMatch::new(20, 5, 0, 0);
assert!(long_match.quality > short_match.quality);
assert!(long_match.is_better_than(&short_match));
}
#[test]
fn test_configuration_validation() {
let valid_config = LocalMatcherConfig::default();
assert!(valid_config.validate().is_ok());
let invalid_config = LocalMatcherConfig {
window_size: 0,
..Default::default()
};
assert!(invalid_config.validate().is_err());
let invalid_config = LocalMatcherConfig {
min_match_length: 10,
max_match_length: 5,
..Default::default()
};
assert!(invalid_config.validate().is_err());
}
#[test]
fn test_preset_configurations() {
let fast = LocalMatcherConfig::fast_compression();
assert_eq!(fast.window_size, 32 * 1024);
assert_eq!(fast.max_probe_distance, 4);
let max_comp = LocalMatcherConfig::max_compression();
assert_eq!(max_comp.window_size, 256 * 1024);
assert_eq!(max_comp.max_probe_distance, 16);
let realtime = LocalMatcherConfig::realtime();
assert_eq!(realtime.window_size, 16 * 1024);
assert_eq!(realtime.max_probe_distance, 2);
}
#[test]
fn test_hash_pattern() {
let matcher = create_test_matcher();
let pattern1 = b"test";
let pattern2 = b"test";
let pattern3 = b"TEST";
let hash1 = matcher.hash_pattern(pattern1);
let hash2 = matcher.hash_pattern(pattern2);
let hash3 = matcher.hash_pattern(pattern3);
assert_eq!(hash1, hash2); assert_ne!(hash1, hash3); }
#[test]
fn test_statistics_tracking() {
let mut matcher = create_test_matcher();
let data = b"test data test";
for (i, &byte) in data.iter().enumerate() {
matcher.add_byte(byte, i).unwrap();
}
let _ = matcher.find_matches(data, 10, 5).unwrap();
let _ = matcher.find_matches(data, 5, 8).unwrap();
let stats = matcher.stats();
assert_eq!(stats.bytes_added, data.len() as u64);
assert_eq!(stats.searches_performed, 2);
assert!(stats.match_success_ratio() >= 0.0 && stats.match_success_ratio() <= 1.0);
}
#[test]
fn test_clear_and_reset() {
let mut matcher = create_test_matcher();
let data = b"some test data";
for (i, &byte) in data.iter().enumerate() {
matcher.add_byte(byte, i).unwrap();
}
assert!(matcher.window_size() > 0);
assert!(matcher.stats().bytes_added > 0);
matcher.clear();
assert_eq!(matcher.window_size(), 0);
assert_eq!(matcher.stats().bytes_added, 0);
assert!(!matcher.is_window_full());
}
#[test]
fn test_validation() {
let matcher = create_test_matcher();
assert!(matcher.validate().is_ok());
}
}