msy 0.4.2

Modern musl rsync alternative - Fast, parallel file synchronization
Documentation
use super::Adler32;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{self, Read, Seek};
use std::path::Path;

/// Block checksum containing both weak and strong hashes
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BlockChecksum {
    /// Block index (0-based)
    pub index: u64,
    /// Byte offset in file
    pub offset: u64,
    /// Block size in bytes
    pub size: usize,
    /// Weak rolling checksum (Adler-32)
    pub weak: u32,
    /// Strong checksum (xxHash3)
    pub strong: u64,
}

/// Compute checksums for all blocks in a file
///
/// This is called on the destination file to create a checksum map
/// that the source can use to find matching blocks.
///
/// Uses parallel processing for 2-4x speedup on large files (>100MB).
/// Each thread processes blocks independently with its own file handle.
#[allow(dead_code)] // Reserved for future remote sync implementation
pub fn compute_checksums(path: &Path, block_size: usize) -> io::Result<Vec<BlockChecksum>> {
    // Get file size to determine number of blocks
    let metadata = std::fs::metadata(path)?;
    let file_size = metadata.len();

    if file_size == 0 {
        return Ok(Vec::new());
    }

    // Calculate number of blocks
    let num_blocks = file_size.div_ceil(block_size as u64);

    // Process blocks in parallel using rayon
    // Each thread gets its own file handle for independent I/O
    let path_buf = path.to_path_buf();
    let checksums: io::Result<Vec<BlockChecksum>> = (0..num_blocks)
        .into_par_iter()
        .map(|index| {
            // Each thread opens its own file handle
            let mut file = File::open(&path_buf)?;
            let offset = index * block_size as u64;

            // Seek to block position
            file.seek(io::SeekFrom::Start(offset))?;

            // Read block (may be partial for last block)
            let mut buffer = vec![0u8; block_size];
            let bytes_read = file.read(&mut buffer)?;
            let block = &buffer[..bytes_read];

            // Compute weak checksum (Adler-32)
            let weak = Adler32::hash(block);

            // Compute strong checksum (xxHash3)
            let mut hasher = xxhash_rust::xxh3::Xxh3::new();
            hasher.update(block);
            let strong = hasher.digest();

            Ok(BlockChecksum {
                index,
                offset,
                size: bytes_read,
                weak,
                strong,
            })
        })
        .collect();

    checksums
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;

    #[test]
    fn test_compute_checksums() {
        // Create test file (51 bytes)
        let mut temp_file = NamedTempFile::new().unwrap();
        temp_file
            .write_all(b"Hello, World! This is a test file for checksumming.")
            .unwrap();
        temp_file.flush().unwrap();

        // Compute checksums
        let checksums = compute_checksums(temp_file.path(), 16).unwrap();

        // Should have ceil(51 / 16) = 4 blocks (3 full + 1 partial)
        assert_eq!(checksums.len(), 4);

        // Check first block
        assert_eq!(checksums[0].index, 0);
        assert_eq!(checksums[0].offset, 0);
        assert_eq!(checksums[0].size, 16);

        // Check last block (partial - 3 bytes)
        let last = &checksums[3];
        assert_eq!(last.index, 3);
        assert_eq!(last.offset, 48);
        assert_eq!(last.size, 3); // "ng."
    }

    #[test]
    fn test_empty_file() {
        let temp_file = NamedTempFile::new().unwrap();
        let checksums = compute_checksums(temp_file.path(), 1024).unwrap();
        assert_eq!(checksums.len(), 0);
    }

    #[test]
    fn test_checksums_deterministic() {
        let mut temp_file = NamedTempFile::new().unwrap();
        temp_file.write_all(b"test data").unwrap();
        temp_file.flush().unwrap();

        let checksums1 = compute_checksums(temp_file.path(), 4).unwrap();
        let checksums2 = compute_checksums(temp_file.path(), 4).unwrap();

        assert_eq!(checksums1, checksums2);
    }

    #[test]
    fn test_different_block_sizes() {
        let mut temp_file = NamedTempFile::new().unwrap();
        let data = b"a".repeat(100);
        temp_file.write_all(&data).unwrap();
        temp_file.flush().unwrap();

        let checksums_small = compute_checksums(temp_file.path(), 10).unwrap();
        let checksums_large = compute_checksums(temp_file.path(), 50).unwrap();

        assert_eq!(checksums_small.len(), 10); // 100 / 10 = 10 blocks
        assert_eq!(checksums_large.len(), 2); // 100 / 50 = 2 blocks
    }
}