use crate::error::Result;
use crate::hash::Hash;
use crate::object::ChunkEntry;
#[derive(Debug, Clone)]
pub struct ChunkerConfig {
pub min_size: usize,
pub avg_size: usize,
pub max_size: usize,
}
impl Default for ChunkerConfig {
fn default() -> Self {
Self {
min_size: 256 * 1024, avg_size: 512 * 1024, max_size: 1024 * 1024, }
}
}
pub fn chunk_file(data: &[u8], config: &ChunkerConfig) -> Result<Vec<ChunkEntry>> {
use fastcdc::ronomon::FastCDC;
let chunker = FastCDC::new(data, config.min_size, config.avg_size, config.max_size);
let mut chunks = Vec::new();
for chunk in chunker {
let chunk_data = &data[chunk.offset..chunk.offset + chunk.length];
let hash = Hash::hash_bytes(chunk_data);
chunks.push(ChunkEntry {
hash,
size: chunk.length as u64,
});
}
Ok(chunks)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_file_basic() {
let data = vec![0u8; 2 * 1024 * 1024];
let config = ChunkerConfig::default();
let chunks = chunk_file(&data, &config).unwrap();
assert!(
chunks.len() >= 2,
"Expected at least 2 chunks, got {}",
chunks.len()
);
let total_size: u64 = chunks.iter().map(|c| c.size).sum();
assert_eq!(total_size, data.len() as u64);
for chunk in &chunks {
assert!(
chunk.size >= config.min_size as u64,
"Chunk size {} is below minimum {}",
chunk.size,
config.min_size
);
assert!(
chunk.size <= config.max_size as u64,
"Chunk size {} exceeds maximum {}",
chunk.size,
config.max_size
);
}
}
#[test]
fn test_chunk_boundaries() {
let data = (0..2 * 1024 * 1024)
.map(|i| (i % 256) as u8)
.collect::<Vec<_>>();
let config = ChunkerConfig::default();
let chunks = chunk_file(&data, &config).unwrap();
for chunk in &chunks {
assert!(
chunk.size >= config.min_size as u64 || chunk.size == data.len() as u64, "Chunk size {} violates min size {}",
chunk.size,
config.min_size
);
assert!(
chunk.size <= config.max_size as u64,
"Chunk size {} exceeds max size {}",
chunk.size,
config.max_size
);
}
}
#[test]
fn test_deterministic() {
let data = vec![42u8; 2 * 1024 * 1024];
let config = ChunkerConfig::default();
let chunks1 = chunk_file(&data, &config).unwrap();
let chunks2 = chunk_file(&data, &config).unwrap();
assert_eq!(
chunks1.len(),
chunks2.len(),
"Chunk count should be deterministic"
);
for (c1, c2) in chunks1.iter().zip(chunks2.iter()) {
assert_eq!(c1.hash, c2.hash, "Chunk hashes should be deterministic");
assert_eq!(c1.size, c2.size, "Chunk sizes should be deterministic");
}
}
#[test]
fn test_small_file() {
let data = vec![0u8; 100 * 1024]; let config = ChunkerConfig::default();
let chunks = chunk_file(&data, &config).unwrap();
assert_eq!(chunks.len(), 1, "Small file should create single chunk");
assert_eq!(chunks[0].size, data.len() as u64);
}
#[test]
fn test_empty_file() {
let data = vec![];
let config = ChunkerConfig::default();
let chunks = chunk_file(&data, &config).unwrap();
assert_eq!(chunks.len(), 0, "Empty file should create no chunks");
}
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig {
cases: 32, // Reduced case count - chunking is expensive
max_shrink_iters: 5000,
..ProptestConfig::default()
})]
#[test]
fn prop_chunk_sizes_bounded(
data in prop::collection::vec(any::<u8>(), 0..2_000_000)
) {
let config = ChunkerConfig::default();
let chunks = chunk_file(&data, &config)?;
for (i, chunk) in chunks.iter().enumerate() {
let is_last = i == chunks.len() - 1;
prop_assert!(
chunk.size <= config.max_size as u64,
"Chunk {} size {} exceeds max {}",
i, chunk.size, config.max_size
);
if !is_last && chunks.len() > 1 {
prop_assert!(
chunk.size >= config.min_size as u64,
"Chunk {} size {} below min {} (not last chunk)",
i, chunk.size, config.min_size
);
}
}
}
#[test]
fn prop_chunking_deterministic(
data in prop::collection::vec(any::<u8>(), 0..2_000_000)
) {
let config = ChunkerConfig::default();
let chunks1 = chunk_file(&data, &config)?;
let chunks2 = chunk_file(&data, &config)?;
prop_assert_eq!(chunks1.len(), chunks2.len(), "Chunk count must be deterministic");
for (i, (c1, c2)) in chunks1.iter().zip(chunks2.iter()).enumerate() {
prop_assert_eq!(c1.hash, c2.hash, "Chunk {} hash must be deterministic", i);
prop_assert_eq!(c1.size, c2.size, "Chunk {} size must be deterministic", i);
}
}
#[test]
fn prop_total_size_preserved(
data in prop::collection::vec(any::<u8>(), 0..2_000_000)
) {
let config = ChunkerConfig::default();
let chunks = chunk_file(&data, &config)?;
let total_size: u64 = chunks.iter().map(|c| c.size).sum();
prop_assert_eq!(
total_size,
data.len() as u64,
"Total chunk size must equal input size"
);
}
}
}