use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct ChangeRatioResult {
pub change_ratio: f64,
pub blocks_sampled: usize,
pub blocks_changed: usize,
pub use_delta: bool,
pub threshold: f64,
}
impl ChangeRatioResult {
pub fn new(change_ratio: f64, blocks_sampled: usize, blocks_changed: usize, threshold: f64) -> Self {
let use_delta = change_ratio <= threshold;
Self { change_ratio, blocks_sampled, blocks_changed, use_delta, threshold }
}
pub fn change_ratio_percent(&self) -> String {
format!("{:.1}%", self.change_ratio * 100.0)
}
}
pub fn estimate_change_ratio(
source: &Path, dest: &Path, block_size: usize, sample_count: Option<usize>, threshold: Option<f64>,
) -> std::io::Result<ChangeRatioResult> {
let sample_count = sample_count.unwrap_or(20);
let threshold = threshold.unwrap_or(0.75);
let mut source_file = BufReader::with_capacity(256 * 1024, File::open(source)?);
let mut dest_file = BufReader::with_capacity(256 * 1024, File::open(dest)?);
let source_size = source_file.get_ref().metadata()?.len();
let dest_size = dest_file.get_ref().metadata()?.len();
let total_blocks = (dest_size as usize).div_ceil(block_size);
let sample_count = sample_count.min(total_blocks);
let size_diff_ratio = if dest_size > 0 { (source_size as f64 - dest_size as f64).abs() / dest_size as f64 } else { 1.0 };
if size_diff_ratio > 0.5 {
tracing::debug!("Size differs by {:.1}%, assuming high change ratio", size_diff_ratio * 100.0);
return Ok(ChangeRatioResult::new(size_diff_ratio.min(1.0), 0, 0, threshold));
}
let mut sample_positions = Vec::with_capacity(sample_count);
let step = if sample_count > 1 { total_blocks / (sample_count - 1) } else { 0 };
for i in 0..sample_count {
let block_idx = if sample_count > 1 { (i * step).min(total_blocks.saturating_sub(1)) } else { 0 };
sample_positions.push(block_idx);
}
let mut blocks_changed = 0;
let mut source_block = vec![0u8; block_size];
let mut dest_block = vec![0u8; block_size];
for block_idx in &sample_positions {
let offset = (*block_idx * block_size) as u64;
source_file.seek(SeekFrom::Start(offset))?;
dest_file.seek(SeekFrom::Start(offset))?;
let source_read = source_file.read(&mut source_block)?;
let dest_read = dest_file.read(&mut dest_block)?;
if source_read != dest_read {
blocks_changed += 1;
continue;
}
let source_hash = xxhash_rust::xxh3::xxh3_64(&source_block[..source_read]);
let dest_hash = xxhash_rust::xxh3::xxh3_64(&dest_block[..dest_read]);
if source_hash != dest_hash {
blocks_changed += 1;
}
}
let change_ratio = if sample_count > 0 { blocks_changed as f64 / sample_count as f64 } else { 0.0 };
tracing::debug!(
"Sampled {} blocks: {} changed ({:.1}%), threshold {:.1}%",
sample_count,
blocks_changed,
change_ratio * 100.0,
threshold * 100.0
);
Ok(ChangeRatioResult::new(change_ratio, sample_count, blocks_changed, threshold))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_no_changes() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let data = vec![42u8; 1024 * 1024];
std::fs::write(&source, &data).unwrap();
std::fs::write(&dest, &data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert_eq!(result.blocks_changed, 0);
assert_eq!(result.change_ratio, 0.0);
assert!(result.use_delta);
}
#[test]
fn test_all_changed() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let source_data = vec![42u8; 1024 * 1024];
let dest_data = vec![99u8; 1024 * 1024];
std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert_eq!(result.blocks_changed, result.blocks_sampled);
assert_eq!(result.change_ratio, 1.0);
assert!(!result.use_delta); }
#[test]
fn test_partial_change() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let mut source_data = vec![42u8; 1024 * 1024];
let dest_data = vec![42u8; 1024 * 1024];
for byte in &mut source_data[..256 * 1024] {
*byte = 99;
}
std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert!(result.blocks_changed > 0);
assert!(result.blocks_changed < result.blocks_sampled);
assert!(result.change_ratio > 0.0);
assert!(result.change_ratio < 1.0);
assert!(result.use_delta); }
#[test]
fn test_threshold_decision() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let mut source_data = vec![42u8; 1024 * 1024];
let dest_data = vec![42u8; 1024 * 1024];
for byte in &mut source_data[..800 * 1024] {
*byte = 99;
}
std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert!(!result.use_delta);
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, Some(0.90)).unwrap();
assert!(result.use_delta);
}
#[test]
fn test_size_difference() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let source_data = vec![42u8; 2 * 1024 * 1024]; let dest_data = vec![42u8; 1024 * 1024]; std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert!(!result.use_delta);
}
#[test]
fn test_small_sample_count() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let data = vec![42u8; 1024 * 1024];
std::fs::write(&source, &data).unwrap();
std::fs::write(&dest, &data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, Some(5), None).unwrap();
assert_eq!(result.blocks_sampled, 5);
assert_eq!(result.blocks_changed, 0);
assert!(result.use_delta);
}
}