firecloud_storage/
chunker.rs1use crate::StorageResult;
4use fastcdc::v2020::FastCDC;
5use firecloud_core::{Chunk, ChunkHash, ChunkMetadata, CompressionType};
6use std::io::Read;
7use tracing::debug;
8
9#[derive(Debug, Clone)]
11pub struct ChunkingConfig {
12 pub min_size: u32,
14 pub avg_size: u32,
16 pub max_size: u32,
18}
19
20impl Default for ChunkingConfig {
21 fn default() -> Self {
22 Self {
23 min_size: 64 * 1024, avg_size: 1024 * 1024, max_size: 4 * 1024 * 1024, }
27 }
28}
29
30pub struct FileChunker {
32 config: ChunkingConfig,
33}
34
35impl FileChunker {
36 pub fn new() -> Self {
38 Self {
39 config: ChunkingConfig::default(),
40 }
41 }
42
43 pub fn with_config(config: ChunkingConfig) -> Self {
45 Self { config }
46 }
47
48 pub fn chunk_reader<R: Read>(&self, mut reader: R) -> StorageResult<Vec<Chunk>> {
50 let mut data = Vec::new();
52 reader.read_to_end(&mut data)?;
53
54 self.chunk_bytes(&data)
55 }
56
57 pub fn chunk_bytes(&self, data: &[u8]) -> StorageResult<Vec<Chunk>> {
59 if data.is_empty() {
60 return Ok(Vec::new());
61 }
62
63 let chunker = FastCDC::new(
64 data,
65 self.config.min_size,
66 self.config.avg_size,
67 self.config.max_size,
68 );
69
70 let mut chunks = Vec::new();
71
72 for chunk_data in chunker {
73 let chunk_bytes = &data[chunk_data.offset..chunk_data.offset + chunk_data.length];
74 let hash = ChunkHash::hash(chunk_bytes);
75
76 debug!(
77 "Chunk: offset={}, size={}, hash={}",
78 chunk_data.offset,
79 chunk_data.length,
80 hash
81 );
82
83 let chunk = Chunk {
84 metadata: ChunkMetadata {
85 hash,
86 size: chunk_data.length as u64,
87 original_size: chunk_data.length as u64,
88 compression: CompressionType::None,
89 encrypted: false,
90 },
91 data: bytes::Bytes::copy_from_slice(chunk_bytes),
92 };
93
94 chunks.push(chunk);
95 }
96
97 debug!("Chunked {} bytes into {} chunks", data.len(), chunks.len());
98
99 Ok(chunks)
100 }
101
102 pub fn reassemble(chunks: &[Chunk]) -> bytes::Bytes {
104 let total_size: usize = chunks.iter().map(|c| c.data.len()).sum();
105 let mut result = Vec::with_capacity(total_size);
106
107 for chunk in chunks {
108 result.extend_from_slice(&chunk.data);
109 }
110
111 bytes::Bytes::from(result)
112 }
113}
114
115impl Default for FileChunker {
116 fn default() -> Self {
117 Self::new()
118 }
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124
125 #[test]
126 fn test_chunk_small_file() {
127 let chunker = FileChunker::new();
128 let data = b"Hello, FireCloud! This is a test file.";
129
130 let chunks = chunker.chunk_bytes(data).unwrap();
131
132 assert_eq!(chunks.len(), 1);
134 assert_eq!(chunks[0].data.as_ref(), data);
135 }
136
137 #[test]
138 fn test_reassemble() {
139 let chunker = FileChunker::new();
140
141 let data: Vec<u8> = (0..200_000).map(|i| (i % 256) as u8).collect();
143
144 let chunks = chunker.chunk_bytes(&data).unwrap();
145 let reassembled = FileChunker::reassemble(&chunks);
146
147 assert_eq!(reassembled.as_ref(), data.as_slice());
148 }
149
150 #[test]
151 fn test_empty_file() {
152 let chunker = FileChunker::new();
153 let chunks = chunker.chunk_bytes(&[]).unwrap();
154 assert!(chunks.is_empty());
155 }
156}