vflight 0.9.2

Share files over the Veilid distributed network with content-addressable storage
Documentation
//! File chunking and reassembly with BLAKE3 hashing.
//!
//! This module handles splitting large files into fixed-size chunks
//! and reassembling them, with cryptographic hashing for integrity verification.

use anyhow::{Context, Result};
use std::path::Path;
use std::time::Instant;
use tracing::{debug, instrument};

use crate::metrics::{global_metrics, MetricCategory};
use crate::protocol::CHUNK_SIZE;

/// A single file chunk with its index, data, and hash.
///
/// # Example
///
/// ```
/// let chunk = vflight::Chunk {
///     index: 0,
///     data: vec![1, 2, 3, 4, 5],
///     hash: "abc123".to_string(),
/// };
/// ```
#[derive(Debug, Clone)]
pub struct Chunk {
    /// Zero-based index of this chunk in the file
    pub index: u64,
    /// Raw chunk data (up to CHUNK_SIZE bytes)
    pub data: Vec<u8>,
    /// BLAKE3 hash of the chunk data in hex format
    pub hash: String,
}

/// Split raw data into fixed-size chunks with BLAKE3 hashes.
#[instrument(level = "debug", skip(data), fields(data_len = data.len()))]
pub fn chunk_data(data: &[u8]) -> Vec<Chunk> {
    let mut chunks = Vec::new();

    for (i, piece) in data.chunks(CHUNK_SIZE).enumerate() {
        let hash_start = Instant::now();
        let hash = blake3::hash(piece).to_hex().to_string();
        global_metrics().record(
            MetricCategory::HashCompute,
            hash_start.elapsed(),
            piece.len() as u64,
        );

        chunks.push(Chunk {
            index: i as u64,
            data: piece.to_vec(),
            hash,
        });
    }

    debug!(num_chunks = chunks.len(), "Chunking complete");
    chunks
}

/// Split a file into fixed-size chunks with BLAKE3 hashes.
///
/// # Arguments
///
/// * `path` - Path to the file to chunk
///
/// # Returns
///
/// A vector of `Chunk` objects, or an error if the file cannot be read.
///
/// # Example
///
/// ```no_run
/// use vflight::chunk_file;
/// use std::path::Path;
///
/// # async fn example() -> anyhow::Result<()> {
/// let chunks = chunk_file(Path::new("myfile.bin"))?;
/// for chunk in chunks {
///     println!("Chunk {}: {}", chunk.index, chunk.hash);
/// }
/// # Ok(())
/// # }
/// ```
#[instrument(level = "debug", skip(path), fields(file_path = %path.display()))]
pub fn chunk_file(path: &Path) -> Result<Vec<Chunk>> {
    let read_start = Instant::now();
    let data =
        std::fs::read(path).with_context(|| format!("Failed to read file: {}", path.display()))?;
    global_metrics().record(
        MetricCategory::FileIO,
        read_start.elapsed(),
        data.len() as u64,
    );
    debug!(file_size = data.len(), "File read successfully");

    Ok(chunk_data(&data))
}

/// Reassemble chunks back into a file.
///
/// # Arguments
///
/// * `chunks` - Mutable slice of chunk data buffers (should be in order)
/// * `output_path` - Path where the reassembled file should be written
///
/// # Returns
///
/// Ok(()) on success, or an error if the file cannot be written.
///
/// # Example
///
/// ```no_run
/// use vflight::{reassemble_chunks, Chunk};
/// use std::path::Path;
///
/// # fn example() -> anyhow::Result<()> {
/// let chunks = vec![
///     Chunk { index: 0, data: vec![1u8, 2, 3], hash: String::new() },
///     Chunk { index: 1, data: vec![4u8, 5, 6], hash: String::new() },
/// ];
/// reassemble_chunks(&chunks, Path::new("output.bin"))?;
/// # Ok(())
/// # }
/// ```
#[instrument(level = "debug", skip(chunks, output_path), fields(num_chunks = chunks.len(), output_path = %output_path.display()))]
pub fn reassemble_chunks(chunks: &[Chunk], output_path: &Path) -> Result<()> {
    let mut file_data = Vec::new();
    for chunk in chunks.iter() {
        file_data.extend_from_slice(&chunk.data);
    }
    debug!(total_size = file_data.len(), "Chunks concatenated");

    // Time file write
    let write_start = Instant::now();
    std::fs::write(output_path, &file_data)
        .with_context(|| format!("Failed to write output file: {}", output_path.display()))?;
    global_metrics().record(
        MetricCategory::FileIO,
        write_start.elapsed(),
        file_data.len() as u64,
    );
    debug!("File written successfully");
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use tempfile::TempDir;

    #[test]
    fn test_chunk_file_single_chunk() {
        let temp_dir = TempDir::new().unwrap();
        let test_file = temp_dir.path().join("small.txt");
        let test_data = b"Hello, World!";
        fs::write(&test_file, test_data).unwrap();

        let chunks = chunk_file(&test_file).unwrap();
        assert_eq!(chunks.len(), 1);
        assert_eq!(chunks[0].index, 0);
        assert_eq!(chunks[0].data, test_data);
    }

    #[test]
    fn test_chunk_file_multiple_chunks() {
        let temp_dir = TempDir::new().unwrap();
        let test_file = temp_dir.path().join("large.bin");
        let test_data = vec![42u8; CHUNK_SIZE * 3 + 1000];
        fs::write(&test_file, &test_data).unwrap();

        let chunks = chunk_file(&test_file).unwrap();
        assert_eq!(chunks.len(), 4);

        // Verify each chunk has correct index
        for (i, chunk) in chunks.iter().enumerate() {
            assert_eq!(chunk.index, i as u64);
        }
    }

    #[test]
    fn test_chunk_hash_consistency() {
        let temp_dir = TempDir::new().unwrap();
        let test_file = temp_dir.path().join("hash_test.txt");
        let test_data = b"test data";
        fs::write(&test_file, test_data).unwrap();

        let chunks1 = chunk_file(&test_file).unwrap();
        let chunks2 = chunk_file(&test_file).unwrap();

        assert_eq!(chunks1[0].hash, chunks2[0].hash);
    }

    #[test]
    fn test_reassemble_chunks() {
        let temp_dir = TempDir::new().unwrap();
        let original_data = vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10];
        let chunks = vec![
            Chunk {
                index: 0,
                data: vec![1u8, 2, 3, 4, 5],
                hash: String::new(),
            },
            Chunk {
                index: 1,
                data: vec![6u8, 7, 8, 9, 10],
                hash: String::new(),
            },
        ];

        let output_file = temp_dir.path().join("reassembled.bin");
        reassemble_chunks(&chunks, &output_file).unwrap();

        let reassembled_data = fs::read(&output_file).unwrap();
        assert_eq!(reassembled_data, original_data);
    }

    #[test]
    fn test_chunk_nonexistent_file() {
        let result = chunk_file(std::path::Path::new("/tmp/nonexistent_file_xyz.txt"));
        assert!(result.is_err());
    }

    #[test]
    fn test_chunk_empty_file() {
        let temp_dir = TempDir::new().unwrap();
        let test_file = temp_dir.path().join("empty.txt");
        fs::write(&test_file, b"").unwrap();

        let chunks = chunk_file(&test_file).unwrap();
        assert_eq!(chunks.len(), 0);
    }
}