use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct ChangeRatioResult {
pub change_ratio: f64,
pub blocks_sampled: usize,
pub blocks_changed: usize,
pub use_delta: bool,
pub threshold: f64,
}
impl ChangeRatioResult {
pub fn new(
change_ratio: f64,
blocks_sampled: usize,
blocks_changed: usize,
threshold: f64,
) -> Self {
let use_delta = change_ratio <= threshold;
Self {
change_ratio,
blocks_sampled,
blocks_changed,
use_delta,
threshold,
}
}
pub fn change_ratio_percent(&self) -> String {
format!("{:.1}%", self.change_ratio * 100.0)
}
}
pub fn estimate_change_ratio(
source: &Path,
dest: &Path,
block_size: usize,
sample_count: Option<usize>,
threshold: Option<f64>,
) -> std::io::Result<ChangeRatioResult> {
let sample_count = sample_count.unwrap_or(20);
let threshold = threshold.unwrap_or(0.75);
let mut source_file = BufReader::with_capacity(256 * 1024, File::open(source)?);
let mut dest_file = BufReader::with_capacity(256 * 1024, File::open(dest)?);
let source_size = source_file.get_ref().metadata()?.len();
let dest_size = dest_file.get_ref().metadata()?.len();
let total_blocks = (dest_size as usize).div_ceil(block_size);
let sample_count = sample_count.min(total_blocks);
let size_diff_ratio = if dest_size > 0 {
(source_size as f64 - dest_size as f64).abs() / dest_size as f64
} else {
1.0
};
if size_diff_ratio > 0.5 {
tracing::debug!(
"Size differs by {:.1}%, assuming high change ratio",
size_diff_ratio * 100.0
);
return Ok(ChangeRatioResult::new(
size_diff_ratio.min(1.0),
0,
0,
threshold,
));
}
let mut sample_positions = Vec::with_capacity(sample_count);
let step = if sample_count > 1 {
total_blocks / (sample_count - 1)
} else {
0
};
for i in 0..sample_count {
let block_idx = if sample_count > 1 {
(i * step).min(total_blocks.saturating_sub(1))
} else {
0
};
sample_positions.push(block_idx);
}
let mut blocks_changed = 0;
let mut source_block = vec![0u8; block_size];
let mut dest_block = vec![0u8; block_size];
for block_idx in &sample_positions {
let offset = (*block_idx * block_size) as u64;
source_file.seek(SeekFrom::Start(offset))?;
dest_file.seek(SeekFrom::Start(offset))?;
let source_read = source_file.read(&mut source_block)?;
let dest_read = dest_file.read(&mut dest_block)?;
if source_read != dest_read {
blocks_changed += 1;
continue;
}
let source_hash = xxhash_rust::xxh3::xxh3_64(&source_block[..source_read]);
let dest_hash = xxhash_rust::xxh3::xxh3_64(&dest_block[..dest_read]);
if source_hash != dest_hash {
blocks_changed += 1;
}
}
let change_ratio = if sample_count > 0 {
blocks_changed as f64 / sample_count as f64
} else {
0.0
};
tracing::debug!(
"Sampled {} blocks: {} changed ({:.1}%), threshold {:.1}%",
sample_count,
blocks_changed,
change_ratio * 100.0,
threshold * 100.0
);
Ok(ChangeRatioResult::new(
change_ratio,
sample_count,
blocks_changed,
threshold,
))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_no_changes() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let data = vec![42u8; 1024 * 1024];
std::fs::write(&source, &data).unwrap();
std::fs::write(&dest, &data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert_eq!(result.blocks_changed, 0);
assert_eq!(result.change_ratio, 0.0);
assert!(result.use_delta);
}
#[test]
fn test_all_changed() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let source_data = vec![42u8; 1024 * 1024];
let dest_data = vec![99u8; 1024 * 1024];
std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert_eq!(result.blocks_changed, result.blocks_sampled);
assert_eq!(result.change_ratio, 1.0);
assert!(!result.use_delta); }
#[test]
fn test_partial_change() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let mut source_data = vec![42u8; 1024 * 1024];
let dest_data = vec![42u8; 1024 * 1024];
for byte in &mut source_data[..256 * 1024] {
*byte = 99;
}
std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert!(result.blocks_changed > 0);
assert!(result.blocks_changed < result.blocks_sampled);
assert!(result.change_ratio > 0.0);
assert!(result.change_ratio < 1.0);
assert!(result.use_delta); }
#[test]
fn test_threshold_decision() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let mut source_data = vec![42u8; 1024 * 1024];
let dest_data = vec![42u8; 1024 * 1024];
for byte in &mut source_data[..800 * 1024] {
*byte = 99;
}
std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert!(!result.use_delta);
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, Some(0.90)).unwrap();
assert!(result.use_delta);
}
#[test]
fn test_size_difference() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let source_data = vec![42u8; 2 * 1024 * 1024]; let dest_data = vec![42u8; 1024 * 1024]; std::fs::write(&source, &source_data).unwrap();
std::fs::write(&dest, &dest_data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, None, None).unwrap();
assert!(!result.use_delta);
}
#[test]
fn test_small_sample_count() {
let temp = TempDir::new().unwrap();
let source = temp.path().join("source.bin");
let dest = temp.path().join("dest.bin");
let data = vec![42u8; 1024 * 1024];
std::fs::write(&source, &data).unwrap();
std::fs::write(&dest, &data).unwrap();
let result = estimate_change_ratio(&source, &dest, 64 * 1024, Some(5), None).unwrap();
assert_eq!(result.blocks_sampled, 5);
assert_eq!(result.blocks_changed, 0);
assert!(result.use_delta);
}
}