use crate::StorageResult;
use fastcdc::v2020::FastCDC;
use firecloud_core::{Chunk, ChunkHash, ChunkMetadata, CompressionType};
use std::io::Read;
use tracing::debug;
#[derive(Debug, Clone)]
pub struct ChunkingConfig {
pub min_size: u32,
pub avg_size: u32,
pub max_size: u32,
}
impl Default for ChunkingConfig {
fn default() -> Self {
Self {
min_size: 64 * 1024, avg_size: 1024 * 1024, max_size: 4 * 1024 * 1024, }
}
}
pub struct FileChunker {
config: ChunkingConfig,
}
impl FileChunker {
pub fn new() -> Self {
Self {
config: ChunkingConfig::default(),
}
}
pub fn with_config(config: ChunkingConfig) -> Self {
Self { config }
}
pub fn chunk_reader<R: Read>(&self, mut reader: R) -> StorageResult<Vec<Chunk>> {
let mut data = Vec::new();
reader.read_to_end(&mut data)?;
self.chunk_bytes(&data)
}
pub fn chunk_bytes(&self, data: &[u8]) -> StorageResult<Vec<Chunk>> {
if data.is_empty() {
return Ok(Vec::new());
}
let chunker = FastCDC::new(
data,
self.config.min_size,
self.config.avg_size,
self.config.max_size,
);
let mut chunks = Vec::new();
for chunk_data in chunker {
let chunk_bytes = &data[chunk_data.offset..chunk_data.offset + chunk_data.length];
let hash = ChunkHash::hash(chunk_bytes);
debug!(
"Chunk: offset={}, size={}, hash={}",
chunk_data.offset,
chunk_data.length,
hash
);
let chunk = Chunk {
metadata: ChunkMetadata {
hash,
size: chunk_data.length as u64,
original_size: chunk_data.length as u64,
compression: CompressionType::None,
encrypted: false,
},
data: bytes::Bytes::copy_from_slice(chunk_bytes),
};
chunks.push(chunk);
}
debug!("Chunked {} bytes into {} chunks", data.len(), chunks.len());
Ok(chunks)
}
pub fn reassemble(chunks: &[Chunk]) -> bytes::Bytes {
let total_size: usize = chunks.iter().map(|c| c.data.len()).sum();
let mut result = Vec::with_capacity(total_size);
for chunk in chunks {
result.extend_from_slice(&chunk.data);
}
bytes::Bytes::from(result)
}
}
impl Default for FileChunker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_small_file() {
let chunker = FileChunker::new();
let data = b"Hello, FireCloud! This is a test file.";
let chunks = chunker.chunk_bytes(data).unwrap();
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].data.as_ref(), data);
}
#[test]
fn test_reassemble() {
let chunker = FileChunker::new();
let data: Vec<u8> = (0..200_000).map(|i| (i % 256) as u8).collect();
let chunks = chunker.chunk_bytes(&data).unwrap();
let reassembled = FileChunker::reassemble(&chunks);
assert_eq!(reassembled.as_ref(), data.as_slice());
}
#[test]
fn test_empty_file() {
let chunker = FileChunker::new();
let chunks = chunker.chunk_bytes(&[]).unwrap();
assert!(chunks.is_empty());
}
}