use super::Adler32;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{self, Read, Seek};
use std::path::Path;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BlockChecksum {
pub index: u64,
pub offset: u64,
pub size: usize,
pub weak: u32,
pub strong: u64,
}
#[allow(dead_code)] pub fn compute_checksums(path: &Path, block_size: usize) -> io::Result<Vec<BlockChecksum>> {
let metadata = std::fs::metadata(path)?;
let file_size = metadata.len();
if file_size == 0 {
return Ok(Vec::new());
}
let num_blocks = file_size.div_ceil(block_size as u64);
let path_buf = path.to_path_buf();
let checksums: io::Result<Vec<BlockChecksum>> = (0..num_blocks)
.into_par_iter()
.map(|index| {
let mut file = File::open(&path_buf)?;
let offset = index * block_size as u64;
file.seek(io::SeekFrom::Start(offset))?;
let mut buffer = vec![0u8; block_size];
let bytes_read = file.read(&mut buffer)?;
let block = &buffer[..bytes_read];
let weak = Adler32::hash(block);
let mut hasher = xxhash_rust::xxh3::Xxh3::new();
hasher.update(block);
let strong = hasher.digest();
Ok(BlockChecksum { index, offset, size: bytes_read, weak, strong })
})
.collect();
checksums
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_compute_checksums() {
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(b"Hello, World! This is a test file for checksumming.").unwrap();
temp_file.flush().unwrap();
let checksums = compute_checksums(temp_file.path(), 16).unwrap();
assert_eq!(checksums.len(), 4);
assert_eq!(checksums[0].index, 0);
assert_eq!(checksums[0].offset, 0);
assert_eq!(checksums[0].size, 16);
let last = &checksums[3];
assert_eq!(last.index, 3);
assert_eq!(last.offset, 48);
assert_eq!(last.size, 3); }
#[test]
fn test_empty_file() {
let temp_file = NamedTempFile::new().unwrap();
let checksums = compute_checksums(temp_file.path(), 1024).unwrap();
assert_eq!(checksums.len(), 0);
}
#[test]
fn test_checksums_deterministic() {
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(b"test data").unwrap();
temp_file.flush().unwrap();
let checksums1 = compute_checksums(temp_file.path(), 4).unwrap();
let checksums2 = compute_checksums(temp_file.path(), 4).unwrap();
assert_eq!(checksums1, checksums2);
}
#[test]
fn test_different_block_sizes() {
let mut temp_file = NamedTempFile::new().unwrap();
let data = b"a".repeat(100);
temp_file.write_all(&data).unwrap();
temp_file.flush().unwrap();
let checksums_small = compute_checksums(temp_file.path(), 10).unwrap();
let checksums_large = compute_checksums(temp_file.path(), 50).unwrap();
assert_eq!(checksums_small.len(), 10); assert_eq!(checksums_large.len(), 2); }
}