use anyhow::{Context, Result};
use std::path::Path;
use std::time::Instant;
use tracing::{debug, instrument};
use crate::metrics::{global_metrics, MetricCategory};
use crate::protocol::CHUNK_SIZE;
#[derive(Debug, Clone)]
pub struct Chunk {
pub index: u64,
pub data: Vec<u8>,
pub hash: String,
}
#[instrument(level = "debug", skip(data), fields(data_len = data.len()))]
pub fn chunk_data(data: &[u8]) -> Vec<Chunk> {
let mut chunks = Vec::new();
for (i, piece) in data.chunks(CHUNK_SIZE).enumerate() {
let hash_start = Instant::now();
let hash = blake3::hash(piece).to_hex().to_string();
global_metrics().record(
MetricCategory::HashCompute,
hash_start.elapsed(),
piece.len() as u64,
);
chunks.push(Chunk {
index: i as u64,
data: piece.to_vec(),
hash,
});
}
debug!(num_chunks = chunks.len(), "Chunking complete");
chunks
}
#[instrument(level = "debug", skip(path), fields(file_path = %path.display()))]
pub fn chunk_file(path: &Path) -> Result<Vec<Chunk>> {
let read_start = Instant::now();
let data =
std::fs::read(path).with_context(|| format!("Failed to read file: {}", path.display()))?;
global_metrics().record(
MetricCategory::FileIO,
read_start.elapsed(),
data.len() as u64,
);
debug!(file_size = data.len(), "File read successfully");
Ok(chunk_data(&data))
}
#[instrument(level = "debug", skip(chunks, output_path), fields(num_chunks = chunks.len(), output_path = %output_path.display()))]
pub fn reassemble_chunks(chunks: &[Chunk], output_path: &Path) -> Result<()> {
let mut file_data = Vec::new();
for chunk in chunks.iter() {
file_data.extend_from_slice(&chunk.data);
}
debug!(total_size = file_data.len(), "Chunks concatenated");
let write_start = Instant::now();
std::fs::write(output_path, &file_data)
.with_context(|| format!("Failed to write output file: {}", output_path.display()))?;
global_metrics().record(
MetricCategory::FileIO,
write_start.elapsed(),
file_data.len() as u64,
);
debug!("File written successfully");
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_chunk_file_single_chunk() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("small.txt");
let test_data = b"Hello, World!";
fs::write(&test_file, test_data).unwrap();
let chunks = chunk_file(&test_file).unwrap();
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].index, 0);
assert_eq!(chunks[0].data, test_data);
}
#[test]
fn test_chunk_file_multiple_chunks() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("large.bin");
let test_data = vec![42u8; CHUNK_SIZE * 3 + 1000];
fs::write(&test_file, &test_data).unwrap();
let chunks = chunk_file(&test_file).unwrap();
assert_eq!(chunks.len(), 4);
for (i, chunk) in chunks.iter().enumerate() {
assert_eq!(chunk.index, i as u64);
}
}
#[test]
fn test_chunk_hash_consistency() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("hash_test.txt");
let test_data = b"test data";
fs::write(&test_file, test_data).unwrap();
let chunks1 = chunk_file(&test_file).unwrap();
let chunks2 = chunk_file(&test_file).unwrap();
assert_eq!(chunks1[0].hash, chunks2[0].hash);
}
#[test]
fn test_reassemble_chunks() {
let temp_dir = TempDir::new().unwrap();
let original_data = vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let chunks = vec![
Chunk {
index: 0,
data: vec![1u8, 2, 3, 4, 5],
hash: String::new(),
},
Chunk {
index: 1,
data: vec![6u8, 7, 8, 9, 10],
hash: String::new(),
},
];
let output_file = temp_dir.path().join("reassembled.bin");
reassemble_chunks(&chunks, &output_file).unwrap();
let reassembled_data = fs::read(&output_file).unwrap();
assert_eq!(reassembled_data, original_data);
}
#[test]
fn test_chunk_nonexistent_file() {
let result = chunk_file(std::path::Path::new("/tmp/nonexistent_file_xyz.txt"));
assert!(result.is_err());
}
#[test]
fn test_chunk_empty_file() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("empty.txt");
fs::write(&test_file, b"").unwrap();
let chunks = chunk_file(&test_file).unwrap();
assert_eq!(chunks.len(), 0);
}
}